Print this page
13902 Fix for 13717 may break 8-disk raidz2
13915 installctx() blocking allocate causes problems
Portions contributed by: Jerry Jelinek <gjelinek@gmail.com>
Change-Id: I934d69946cec42630fc541fa8c7385b862b69ca2
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/intel/ia32/os/fpu.c
+++ new/usr/src/uts/intel/ia32/os/fpu.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 - * Copyright 2020 Joyent, Inc.
23 + * Copyright 2021 Joyent, Inc.
24 24 * Copyright 2021 RackTop Systems, Inc.
25 25 */
26 26
27 27 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
28 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
29 29 /* All Rights Reserved */
30 30
31 31 /* Copyright (c) 1987, 1988 Microsoft Corporation */
32 32 /* All Rights Reserved */
33 33
34 34 /*
35 35 * Copyright (c) 2009, Intel Corporation.
36 36 * All rights reserved.
37 37 */
38 38
39 39 #include <sys/types.h>
40 40 #include <sys/param.h>
41 41 #include <sys/signal.h>
42 42 #include <sys/regset.h>
43 43 #include <sys/privregs.h>
44 44 #include <sys/psw.h>
45 45 #include <sys/trap.h>
46 46 #include <sys/fault.h>
47 47 #include <sys/systm.h>
48 48 #include <sys/user.h>
49 49 #include <sys/file.h>
50 50 #include <sys/proc.h>
51 51 #include <sys/pcb.h>
52 52 #include <sys/lwp.h>
53 53 #include <sys/cpuvar.h>
54 54 #include <sys/thread.h>
55 55 #include <sys/disp.h>
56 56 #include <sys/fp.h>
57 57 #include <sys/siginfo.h>
58 58 #include <sys/archsystm.h>
59 59 #include <sys/kmem.h>
60 60 #include <sys/debug.h>
61 61 #include <sys/x86_archext.h>
62 62 #include <sys/sysmacros.h>
63 63 #include <sys/cmn_err.h>
64 64 #include <sys/kfpu.h>
65 65
66 66 /*
67 67 * FPU Management Overview
68 68 * -----------------------
69 69 *
70 70 * The x86 FPU has evolved substantially since its days as the x87 coprocessor;
71 71 * however, many aspects of its life as a coprocessor are still around in x86.
72 72 *
73 73 * Today, when we refer to the 'FPU', we don't just mean the original x87 FPU.
74 74 * While that state still exists, there is much more that is covered by the FPU.
75 75 * Today, this includes not just traditional FPU state, but also supervisor only
76 76 * state. The following state is currently managed and covered logically by the
77 77 * idea of the FPU registers:
78 78 *
79 79 * o Traditional x87 FPU
80 80 * o Vector Registers (%xmm, %ymm, %zmm)
81 81 * o Memory Protection Extensions (MPX) Bounds Registers
82 82 * o Protected Key Rights Registers (PKRU)
83 83 * o Processor Trace data
84 84 *
85 85 * The rest of this covers how the FPU is managed and controlled, how state is
86 86 * saved and restored between threads, interactions with hypervisors, and other
87 87 * information exported to user land through aux vectors. A lot of background
88 88 * information is here to synthesize major parts of the Intel SDM, but
89 89 * unfortunately, it is not a replacement for reading it.
90 90 *
91 91 * FPU Control Registers
92 92 * ---------------------
93 93 *
94 94 * Because the x87 FPU began its life as a co-processor and the FPU was
95 95 * optional there are several bits that show up in %cr0 that we have to
96 96 * manipulate when dealing with the FPU. These are:
97 97 *
98 98 * o CR0.ET The 'extension type' bit. This was used originally to indicate
99 99 * that the FPU co-processor was present. Now it is forced on for
100 100 * compatibility. This is often used to verify whether or not the
101 101 * FPU is present.
102 102 *
103 103 * o CR0.NE The 'native error' bit. Used to indicate that native error
104 104 * mode should be enabled. This indicates that we should take traps
105 105 * on FPU errors. The OS enables this early in boot.
106 106 *
107 107 * o CR0.MP The 'Monitor Coprocessor' bit. Used to control whether or not
108 108 * wait/fwait instructions generate a #NM if CR0.TS is set.
109 109 *
110 110 * o CR0.EM The 'Emulation' bit. This is used to cause floating point
111 111 * operations (x87 through SSE4) to trap with a #UD so they can be
112 112 * emulated. The system never sets this bit, but makes sure it is
113 113 * clear on processor start up.
114 114 *
115 115 * o CR0.TS The 'Task Switched' bit. When this is turned on, a floating
116 116 * point operation will generate a #NM. An fwait will as well,
117 117 * depending on the value in CR0.MP.
118 118 *
119 119 * Our general policy is that CR0.ET, CR0.NE, and CR0.MP are always set by
120 120 * the system. Similarly CR0.EM is always unset by the system. CR0.TS has a more
121 121 * complicated role. Historically it has been used to allow running systems to
122 122 * restore the FPU registers lazily. This will be discussed in greater depth
123 123 * later on.
124 124 *
125 125 * %cr4 is also used as part of the FPU control. Specifically we need to worry
126 126 * about the following bits in the system:
127 127 *
128 128 * o CR4.OSFXSR This bit is used to indicate that the OS understands and
129 129 * supports the execution of the fxsave and fxrstor
130 130 * instructions. This bit is required to be set to enable
131 131 * the use of the SSE->SSE4 instructions.
132 132 *
133 133 * o CR4.OSXMMEXCPT This bit is used to indicate that the OS can understand
134 134 * and take a SIMD floating point exception (#XM). This bit
135 135 * is always enabled by the system.
136 136 *
137 137 * o CR4.OSXSAVE This bit is used to indicate that the OS understands and
138 138 * supports the execution of the xsave and xrstor family of
139 139 * instructions. This bit is required to use any of the AVX
140 140 * and newer feature sets.
141 141 *
142 142 * Because all supported processors are 64-bit, they'll always support the XMM
143 143 * extensions and we will enable both CR4.OXFXSR and CR4.OSXMMEXCPT in boot.
144 144 * CR4.OSXSAVE will be enabled and used whenever xsave is reported in cpuid.
145 145 *
146 146 * %xcr0 is used to manage the behavior of the xsave feature set and is only
147 147 * present on the system if xsave is supported. %xcr0 is read and written to
148 148 * through by the xgetbv and xsetbv instructions. This register is present
149 149 * whenever the xsave feature set is supported. Each bit in %xcr0 refers to a
150 150 * different component of the xsave state and controls whether or not that
151 151 * information is saved and restored. For newer feature sets like AVX and MPX,
152 152 * it also controls whether or not the corresponding instructions can be
153 153 * executed (much like CR0.OSFXSR does for the SSE feature sets).
154 154 *
155 155 * Everything in %xcr0 is around features available to users. There is also the
156 156 * IA32_XSS MSR which is used to control supervisor-only features that are still
157 157 * part of the xsave state. Bits that can be set in %xcr0 are reserved in
158 158 * IA32_XSS and vice versa. This is an important property that is particularly
159 159 * relevant to how the xsave instructions operate.
160 160 *
161 161 * Save Mechanisms
162 162 * ---------------
163 163 *
164 164 * When switching between running threads the FPU state needs to be saved and
165 165 * restored by the OS. If this state was not saved, users would rightfully
166 166 * complain about corrupt state. There are three mechanisms that exist on the
167 167 * processor for saving and restoring these state images:
168 168 *
169 169 * o fsave
170 170 * o fxsave
171 171 * o xsave
172 172 *
173 173 * fsave saves and restores only the x87 FPU and is the oldest of these
174 174 * mechanisms. This mechanism is never used in the kernel today because we are
175 175 * always running on systems that support fxsave.
176 176 *
177 177 * The fxsave and fxrstor mechanism allows the x87 FPU and the SSE register
178 178 * state to be saved and restored to and from a struct fxsave_state. This is the
179 179 * default mechanism that is used to save and restore the FPU on amd64. An
180 180 * important aspect of fxsave that was different from the original i386 fsave
181 181 * mechanism is that the restoring of FPU state with pending exceptions will not
182 182 * generate an exception, it will be deferred to the next use of the FPU.
183 183 *
184 184 * The final and by far the most complex mechanism is that of the xsave set.
185 185 * xsave allows for saving and restoring all of the traditional x86 pieces (x87
186 186 * and SSE), while allowing for extensions that will save the %ymm, %zmm, etc.
187 187 * registers.
188 188 *
189 189 * Data is saved and restored into and out of a struct xsave_state. The first
190 190 * part of the struct xsave_state is equivalent to the struct fxsave_state.
191 191 * After that, there is a header which is used to describe the remaining
192 192 * portions of the state. The header is a 64-byte value of which the first two
193 193 * uint64_t values are defined and the rest are reserved and must be zero. The
194 194 * first uint64_t is the xstate_bv member. This describes which values in the
195 195 * xsave_state are actually valid and present. This is updated on a save and
196 196 * used on restore. The second member is the xcomp_bv member. Its last bit
197 197 * determines whether or not a compressed version of the structure is used.
198 198 *
199 199 * When the uncompressed structure is used (currently the only format we
200 200 * support), then each state component is at a fixed offset in the structure,
201 201 * even if it is not being used. For example, if you only saved the AVX related
202 202 * state, but did not save the MPX related state, the offset would not change
203 203 * for any component. With the compressed format, components that aren't used
204 204 * are all elided (though the x87 and SSE state are always there).
205 205 *
206 206 * Unlike fxsave which saves all state, the xsave family does not always save
207 207 * and restore all the state that could be covered by the xsave_state. The
208 208 * instructions all take an argument which is a mask of what to consider. This
209 209 * is the same mask that will be used in the xstate_bv vector and it is also the
210 210 * same values that are present in %xcr0 and IA32_XSS. Though IA32_XSS is only
211 211 * considered with the xsaves and xrstors instructions.
212 212 *
213 213 * When a save or restore is requested, a bitwise and is performed between the
214 214 * requested bits and those that have been enabled in %xcr0. Only the bits that
215 215 * match that are then saved or restored. Others will be silently ignored by
216 216 * the processor. This idea is used often in the OS. We will always request that
217 217 * we save and restore all of the state, but only those portions that are
218 218 * actually enabled in %xcr0 will be touched.
219 219 *
220 220 * If a feature has been asked to be restored that is not set in the xstate_bv
221 221 * feature vector of the save state, then it will be set to its initial state by
222 222 * the processor (usually zeros). Also, when asked to save state, the processor
223 223 * may not write out data that is in its initial state as an optimization. This
224 224 * optimization only applies to saving data and not to restoring data.
225 225 *
226 226 * There are a few different variants of the xsave and xrstor instruction. They
227 227 * are:
228 228 *
229 229 * o xsave This is the original save instruction. It will save all of the
230 230 * requested data in the xsave state structure. It only saves data
231 231 * in the uncompressed (xcomp_bv[63] is zero) format. It may be
232 232 * executed at all privilege levels.
233 233 *
234 234 * o xrstor This is the original restore instruction. It will restore all of
235 235 * the requested data. The xrstor function can handle both the
236 236 * compressed and uncompressed formats. It may be executed at all
237 237 * privilege levels.
238 238 *
239 239 * o xsaveopt This is a variant of the xsave instruction that employs
240 240 * optimizations to try and only write out state that has been
241 241 * modified since the last time an xrstor instruction was called.
242 242 * The processor tracks a tuple of information about the last
243 243 * xrstor and tries to ensure that the same buffer is being used
244 244 * when this optimization is being used. However, because of the
245 245 * way that it tracks the xrstor buffer based on the address of it,
246 246 * it is not suitable for use if that buffer can be easily reused.
247 247 * The most common case is trying to save data to the stack in
248 248 * rtld. It may be executed at all privilege levels.
249 249 *
250 250 * o xsavec This is a variant of the xsave instruction that writes out the
251 251 * compressed form of the xsave_state. Otherwise it behaves as
252 252 * xsave. It may be executed at all privilege levels.
253 253 *
254 254 * o xsaves This is a variant of the xsave instruction. It is similar to
255 255 * xsavec in that it always writes the compressed form of the
256 256 * buffer. Unlike all the other forms, this instruction looks at
257 257 * both the user (%xcr0) and supervisor (IA32_XSS MSR) to determine
258 258 * what to save and restore. xsaves also implements the same
259 259 * optimization that xsaveopt does around modified pieces. User
260 260 * land may not execute the instruction.
261 261 *
262 262 * o xrstors This is a variant of the xrstor instruction. Similar to xsaves
263 263 * it can save and restore both the user and privileged states.
264 264 * Unlike xrstor it can only operate on the compressed form.
265 265 * User land may not execute the instruction.
266 266 *
267 267 * Based on all of these, the kernel has a precedence for what it will use.
268 268 * Basically, xsaves (not supported) is preferred to xsaveopt, which is
269 269 * preferred to xsave. A similar scheme is used when informing rtld (more later)
270 270 * about what it should use. xsavec is preferred to xsave. xsaveopt is not
271 271 * recommended due to the modified optimization not being appropriate for this
272 272 * use.
273 273 *
274 274 * Finally, there is one last gotcha with the xsave state. Importantly some AMD
275 275 * processors did not always save and restore some of the FPU exception state in
276 276 * some cases like Intel did. In those cases the OS will make up for this fact
277 277 * itself.
278 278 *
279 279 * FPU Initialization
280 280 * ------------------
281 281 *
282 282 * One difference with the FPU registers is that not all threads have FPU state,
283 283 * only those that have an lwp. Generally this means kernel threads, which all
284 284 * share p0 and its lwp, do not have FPU state. Though there are definitely
285 285 * exceptions such as kcfpoold. In the rest of this discussion we'll use thread
286 286 * and lwp interchangeably, just think of thread meaning a thread that has a
287 287 * lwp.
288 288 *
289 289 * Each lwp has its FPU state allocated in its pcb (process control block). The
290 290 * actual storage comes from the fpsave_cachep kmem cache. This cache is sized
291 291 * dynamically at start up based on the save mechanism that we're using and the
292 292 * amount of memory required for it. This is dynamic because the xsave_state
293 293 * size varies based on the supported feature set.
294 294 *
295 295 * The hardware side of the FPU is initialized early in boot before we mount the
296 296 * root file system. This is effectively done in fpu_probe(). This is where we
297 297 * make the final decision about what the save and restore mechanisms we should
298 298 * use are, create the fpsave_cachep kmem cache, and initialize a number of
299 299 * function pointers that use save and restoring logic.
300 300 *
301 301 * The thread/lwp side is a a little more involved. There are two different
302 302 * things that we need to concern ourselves with. The first is how the FPU
303 303 * resources are allocated and the second is how the FPU state is initialized
304 304 * for a given lwp.
305 305 *
306 306 * We allocate the FPU save state from our kmem cache as part of lwp_fp_init().
307 307 * This is always called unconditionally by the system as part of creating an
308 308 * LWP.
309 309 *
310 310 * There are three different initialization paths that we deal with. The first
311 311 * is when we are executing a new process. As part of exec all of the register
312 312 * state is reset. The exec case is particularly important because init is born
313 313 * like Athena, sprouting from the head of the kernel, without any true parent
314 314 * to fork from. The second is used whenever we fork or create a new lwp. The
315 315 * third is to deal with special lwps like the agent lwp.
316 316 *
317 317 * During exec, we will call fp_exec() which will initialize and set up the FPU
318 318 * state for the process. That will fill in the initial state for the FPU and
319 319 * also set that state in the FPU itself. As part of fp_exec() we also install a
320 320 * thread context operations vector that takes care of dealing with the saving
321 321 * and restoring of the FPU. These context handlers will also be called whenever
322 322 * an lwp is created or forked. In those cases, to initialize the FPU we will
323 323 * call fp_new_lwp(). Like fp_exec(), fp_new_lwp() will install a context
324 324 * operations vector for the new thread.
325 325 *
326 326 * Next we'll end up in the context operation fp_new_lwp(). This saves the
327 327 * current thread's state, initializes the new thread's state, and copies over
328 328 * the relevant parts of the originating thread's state. It's as this point that
329 329 * we also install the FPU context operations into the new thread, which ensures
330 330 * that all future threads that are descendants of the current one get the
331 331 * thread context operations (unless they call exec).
332 332 *
333 333 * To deal with some things like the agent lwp, we double check the state of the
334 334 * FPU in sys_rtt_common() to make sure that it has been enabled before
335 335 * returning to user land. In general, this path should be rare, but it's useful
336 336 * for the odd lwp here and there.
337 337 *
338 338 * The FPU state will remain valid most of the time. There are times that
339 339 * the state will be rewritten. For example in restorecontext, due to /proc, or
340 340 * the lwp calls exec(). Whether the context is being freed or we are resetting
341 341 * the state, we will call fp_free() to disable the FPU and our context.
342 342 *
343 343 * Finally, when the lwp is destroyed, it will actually destroy and free the FPU
344 344 * state by calling fp_lwp_cleanup().
345 345 *
346 346 * Kernel FPU Multiplexing
347 347 * -----------------------
348 348 *
349 349 * Just as the kernel has to maintain all of the general purpose registers when
350 350 * switching between scheduled threads, the same is true of the FPU registers.
351 351 *
352 352 * When a thread has FPU state, it also has a set of context operations
353 353 * installed. These context operations take care of making sure that the FPU is
354 354 * properly saved and restored during a context switch (fpsave_ctxt and
355 355 * fprestore_ctxt respectively). This means that the current implementation of
356 356 * the FPU is 'eager', when a thread is running the CPU will have its FPU state
357 357 * loaded. While this is always true when executing in userland, there are a few
358 358 * cases where this is not true in the kernel.
359 359 *
360 360 * This was not always the case. Traditionally on x86 a 'lazy' FPU restore was
361 361 * employed. This meant that the FPU would be saved on a context switch and the
362 362 * CR0.TS bit would be set. When a thread next tried to use the FPU, it would
363 363 * then take a #NM trap, at which point we would restore the FPU from the save
364 364 * area and return to user land. Given the frequency of use of the FPU alone by
365 365 * libc, there's no point returning to user land just to trap again.
366 366 *
367 367 * There are a few cases though where the FPU state may need to be changed for a
368 368 * thread on its behalf. The most notable cases are in the case of processes
369 369 * using /proc, restorecontext, forking, etc. In all of these cases the kernel
370 370 * will force a threads FPU state to be saved into the PCB through the fp_save()
371 371 * function. Whenever the FPU is saved, then the FPU_VALID flag is set on the
372 372 * pcb. This indicates that the save state holds currently valid data. As a side
373 373 * effect of this, CR0.TS will be set. To make sure that all of the state is
374 374 * updated before returning to user land, in these cases, we set a flag on the
375 375 * PCB that says the FPU needs to be updated. This will make sure that we take
376 376 * the slow path out of a system call to fix things up for the thread. Due to
377 377 * the fact that this is a rather rare case, effectively setting the equivalent
378 378 * of t_postsys is acceptable.
379 379 *
380 380 * CR0.TS will be set after a save occurs and cleared when a restore occurs.
381 381 * Generally this means it will be cleared immediately by the new thread that is
382 382 * running in a context switch. However, this isn't the case for kernel threads.
383 383 * They currently operate with CR0.TS set as no kernel state is restored for
384 384 * them. This means that using the FPU will cause a #NM and panic.
385 385 *
386 386 * The FPU_VALID flag on the currently executing thread's pcb is meant to track
387 387 * what the value of CR0.TS should be. If it is set, then CR0.TS will be set.
388 388 * However, because we eagerly restore, the only time that CR0.TS should be set
389 389 * for a non-kernel thread is during operations where it will be cleared before
390 390 * returning to user land and importantly, the only data that is in it is its
391 391 * own.
392 392 *
393 393 * Kernel FPU Usage
394 394 * ----------------
395 395 *
396 396 * Traditionally the kernel never used the FPU since it had no need for
397 397 * floating point operations. However, modern FPU hardware supports a variety
398 398 * of SIMD extensions which can speed up code such as parity calculations or
399 399 * encryption.
400 400 *
401 401 * To allow the kernel to take advantage of these features, the
402 402 * kernel_fpu_begin() and kernel_fpu_end() functions should be wrapped
403 403 * around any usage of the FPU by the kernel to ensure that user-level context
404 404 * is properly saved/restored, as well as to properly setup the FPU for use by
405 405 * the kernel. There are a variety of ways this wrapping can be used, as
406 406 * discussed in this section below.
407 407 *
408 408 * When kernel_fpu_begin() and kernel_fpu_end() are used for extended
409 409 * operations, the kernel_fpu_alloc() function should be used to allocate a
410 410 * kfpu_state_t structure that is used to save/restore the thread's kernel FPU
411 411 * state. This structure is not tied to any thread. That is, different threads
412 412 * can reuse the same kfpu_state_t structure, although not concurrently. A
413 413 * kfpu_state_t structure is freed by the kernel_fpu_free() function.
414 414 *
415 415 * In some cases, the kernel may need to use the FPU for a short operation
416 416 * without the overhead to manage a kfpu_state_t structure and without
417 417 * allowing for a context switch off the FPU. In this case the KFPU_NO_STATE
418 418 * bit can be set in the kernel_fpu_begin() and kernel_fpu_end() flags
419 419 * parameter. This indicates that there is no kfpu_state_t. When used this way,
420 420 * kernel preemption should be disabled by the caller (kpreempt_disable) before
421 421 * calling kernel_fpu_begin(), and re-enabled after calling kernel_fpu_end().
422 422 * For this usage, it is important to limit the kernel's FPU use to short
423 423 * operations. The tradeoff between using the FPU without a kfpu_state_t
424 424 * structure vs. the overhead of allowing a context switch while using the FPU
425 425 * should be carefully considered on a case by case basis.
426 426 *
427 427 * In other cases, kernel threads have an LWP, but never execute in user space.
428 428 * In this situation, the LWP's pcb_fpu area can be used to save/restore the
429 429 * kernel's FPU state if the thread is context switched, instead of having to
430 430 * allocate and manage a kfpu_state_t structure. The KFPU_USE_LWP bit in the
431 431 * kernel_fpu_begin() and kernel_fpu_end() flags parameter is used to
432 432 * enable this behavior. It is the caller's responsibility to ensure that this
433 433 * is only used for a kernel thread which never executes in user space.
434 434 *
435 435 * FPU Exceptions
436 436 * --------------
437 437 *
438 438 * Certain operations can cause the kernel to take traps due to FPU activity.
439 439 * Generally these events will cause a user process to receive a SIGFPU and if
440 440 * the kernel receives it in kernel context, we will die. Traditionally the #NM
441 441 * (Device Not Available / No Math) exception generated by CR0.TS would have
442 442 * caused us to restore the FPU. Now it is a fatal event regardless of whether
443 443 * or not user land causes it.
444 444 *
445 445 * While there are some cases where the kernel uses the FPU, it is up to the
446 446 * kernel to use the FPU in a way such that it cannot receive a trap or to use
447 447 * the appropriate trap protection mechanisms.
448 448 *
449 449 * Hypervisors
450 450 * -----------
451 451 *
452 452 * When providing support for hypervisors things are a little bit more
453 453 * complicated because the FPU is not virtualized at all. This means that they
454 454 * need to save and restore the FPU and %xcr0 across entry and exit to the
455 455 * guest. To facilitate this, we provide a series of APIs in <sys/hma.h>. These
456 456 * allow us to use the full native state to make sure that we are always saving
457 457 * and restoring the full FPU that the host sees, even when the guest is using a
458 458 * subset.
459 459 *
460 460 * One tricky aspect of this is that the guest may be using a subset of %xcr0
461 461 * and therefore changing our %xcr0 on the fly. It is vital that when we're
462 462 * saving and restoring the FPU that we always use the largest %xcr0 contents
463 463 * otherwise we will end up leaving behind data in it.
464 464 *
465 465 * ELF PLT Support
466 466 * ---------------
467 467 *
468 468 * rtld has to preserve a subset of the FPU when it is saving and restoring
469 469 * registers due to the amd64 SYS V ABI. See cmd/sgs/rtld/amd64/boot_elf.s for
470 470 * more information. As a result, we set up an aux vector that contains
471 471 * information about what save and restore mechanisms it should be using and
472 472 * the sizing thereof based on what the kernel supports. This is passed down in
473 473 * a series of aux vectors SUN_AT_FPTYPE and SUN_AT_FPSIZE. This information is
474 474 * initialized in fpu_subr.c.
475 475 */
476 476
477 477 kmem_cache_t *fpsave_cachep;
478 478
479 479 /* Legacy fxsave layout + xsave header + ymm */
480 480 #define AVX_XSAVE_SIZE (512 + 64 + 256)
481 481
482 482 /*
483 483 * Various sanity checks.
484 484 */
485 485 CTASSERT(sizeof (struct fxsave_state) == 512);
486 486 CTASSERT(sizeof (struct fnsave_state) == 108);
487 487 CTASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0);
488 488 CTASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE);
489 489
490 490 /*
491 491 * This structure is the x86 implementation of the kernel FPU that is defined in
492 492 * uts/common/sys/kfpu.h.
493 493 */
494 494
495 495 typedef enum kfpu_flags {
496 496 /*
497 497 * This indicates that the save state has initial FPU data.
498 498 */
499 499 KFPU_F_INITIALIZED = 0x01
500 500 } kfpu_flags_t;
501 501
502 502 struct kfpu_state {
503 503 fpu_ctx_t kfpu_ctx;
504 504 kfpu_flags_t kfpu_flags;
505 505 kthread_t *kfpu_curthread;
506 506 };
507 507
508 508 /*
509 509 * Initial kfpu state for SSE/SSE2 used by fpinit()
510 510 */
511 511 const struct fxsave_state sse_initial = {
512 512 FPU_CW_INIT, /* fx_fcw */
513 513 0, /* fx_fsw */
514 514 0, /* fx_fctw */
515 515 0, /* fx_fop */
516 516 #if defined(__amd64)
517 517 0, /* fx_rip */
518 518 0, /* fx_rdp */
519 519 #else
520 520 0, /* fx_eip */
521 521 0, /* fx_cs */
522 522 0, /* __fx_ign0 */
523 523 0, /* fx_dp */
524 524 0, /* fx_ds */
525 525 0, /* __fx_ign1 */
526 526 #endif /* __amd64 */
527 527 SSE_MXCSR_INIT /* fx_mxcsr */
528 528 /* rest of structure is zero */
529 529 };
530 530
531 531 /*
532 532 * Initial kfpu state for AVX used by fpinit()
533 533 */
534 534 const struct xsave_state avx_initial = {
535 535 /*
536 536 * The definition below needs to be identical with sse_initial
537 537 * defined above.
538 538 */
539 539 {
540 540 FPU_CW_INIT, /* fx_fcw */
541 541 0, /* fx_fsw */
542 542 0, /* fx_fctw */
543 543 0, /* fx_fop */
544 544 #if defined(__amd64)
545 545 0, /* fx_rip */
546 546 0, /* fx_rdp */
547 547 #else
548 548 0, /* fx_eip */
549 549 0, /* fx_cs */
550 550 0, /* __fx_ign0 */
551 551 0, /* fx_dp */
552 552 0, /* fx_ds */
553 553 0, /* __fx_ign1 */
554 554 #endif /* __amd64 */
555 555 SSE_MXCSR_INIT /* fx_mxcsr */
556 556 /* rest of structure is zero */
557 557 },
558 558 /*
559 559 * bit0 = 1 for XSTATE_BV to indicate that legacy fields are valid,
560 560 * and CPU should initialize XMM/YMM.
561 561 */
562 562 1,
563 563 0 /* xs_xcomp_bv */
564 564 /* rest of structure is zero */
565 565 };
566 566
567 567 /*
568 568 * mxcsr_mask value (possibly reset in fpu_probe); used to avoid
569 569 * the #gp exception caused by setting unsupported bits in the
570 570 * MXCSR register
571 571 */
572 572 uint32_t sse_mxcsr_mask = SSE_MXCSR_MASK_DEFAULT;
573 573
574 574 /*
575 575 * Initial kfpu state for x87 used by fpinit()
576 576 */
577 577 const struct fnsave_state x87_initial = {
578 578 FPU_CW_INIT, /* f_fcw */
579 579 0, /* __f_ign0 */
580 580 0, /* f_fsw */
581 581 0, /* __f_ign1 */
582 582 0xffff, /* f_ftw */
583 583 /* rest of structure is zero */
584 584 };
585 585
586 586 /*
587 587 * This vector is patched to xsave_ctxt() or xsaveopt_ctxt() if we discover we
588 588 * have an XSAVE-capable chip in fpu_probe.
589 589 */
590 590 void (*fpsave_ctxt)(void *) = fpxsave_ctxt;
591 591 void (*fprestore_ctxt)(void *) = fpxrestore_ctxt;
592 592
593 593 /*
594 594 * This function pointer is changed to xsaveopt if the CPU is xsaveopt capable.
595 595 */
596 596 void (*xsavep)(struct xsave_state *, uint64_t) = xsave;
597 597
598 598 static int fpe_sicode(uint_t);
599 599 static int fpe_simd_sicode(uint_t);
600 600
601 601 /*
602 602 * Copy the state of parent lwp's floating point context into the new lwp.
603 603 * Invoked for both fork() and lwp_create().
604 604 *
605 605 * Note that we inherit -only- the control state (e.g. exception masks,
606 606 * rounding, precision control, etc.); the FPU registers are otherwise
607 607 * reset to their initial state.
608 608 */
609 609 static void
610 610 fp_new_lwp(kthread_id_t t, kthread_id_t ct)
611 611 {
612 612 struct fpu_ctx *fp; /* parent fpu context */
613 613 struct fpu_ctx *cfp; /* new fpu context */
614 614 struct fxsave_state *fx, *cfx;
615 615 struct xsave_state *cxs;
616 616
617 617 ASSERT(fp_kind != FP_NO);
618 618
619 619 fp = &t->t_lwp->lwp_pcb.pcb_fpu;
620 620 cfp = &ct->t_lwp->lwp_pcb.pcb_fpu;
621 621
622 622 /*
623 623 * If the parent FPU state is still in the FPU hw then save it;
624 624 * conveniently, fp_save() already does this for us nicely.
625 625 */
626 626 fp_save(fp);
627 627
628 628 cfp->fpu_flags = FPU_EN | FPU_VALID;
629 629 cfp->fpu_regs.kfpu_status = 0;
630 630 cfp->fpu_regs.kfpu_xstatus = 0;
631 631
632 632 /*
633 633 * Make sure that the child's FPU is cleaned up and made ready for user
634 634 * land.
635 635 */
636 636 PCB_SET_UPDATE_FPU(&ct->t_lwp->lwp_pcb);
637 637
638 638 switch (fp_save_mech) {
639 639 case FP_FXSAVE:
640 640 fx = fp->fpu_regs.kfpu_u.kfpu_fx;
641 641 cfx = cfp->fpu_regs.kfpu_u.kfpu_fx;
642 642 bcopy(&sse_initial, cfx, sizeof (*cfx));
643 643 cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
644 644 cfx->fx_fcw = fx->fx_fcw;
645 645 break;
646 646
647 647 case FP_XSAVE:
648 648 cfp->fpu_xsave_mask = fp->fpu_xsave_mask;
649 649
650 650 VERIFY(fp->fpu_regs.kfpu_u.kfpu_xs != NULL);
651 651
652 652 fx = &fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave;
653 653 cxs = cfp->fpu_regs.kfpu_u.kfpu_xs;
654 654 cfx = &cxs->xs_fxsave;
655 655
656 656 bcopy(&avx_initial, cxs, sizeof (*cxs));
657 657 cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
658 658 cfx->fx_fcw = fx->fx_fcw;
659 659 cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) &
660 660 XFEATURE_FP_INITIAL);
661 661 break;
662 662 default:
|
↓ open down ↓ |
629 lines elided |
↑ open up ↑ |
663 663 panic("Invalid fp_save_mech");
664 664 /*NOTREACHED*/
665 665 }
666 666
667 667 /*
668 668 * Mark that both the parent and child need to have the FPU cleaned up
669 669 * before returning to user land.
670 670 */
671 671
672 672 installctx(ct, cfp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp,
673 - fp_new_lwp, NULL, fp_free);
673 + fp_new_lwp, NULL, fp_free, NULL);
674 674 }
675 675
676 676 /*
677 677 * Free any state associated with floating point context.
678 678 * Fp_free can be called in three cases:
679 679 * 1) from reaper -> thread_free -> freectx-> fp_free
680 680 * fp context belongs to a thread on deathrow
681 681 * nothing to do, thread will never be resumed
682 682 * thread calling ctxfree is reaper
683 683 *
684 684 * 2) from exec -> freectx -> fp_free
685 685 * fp context belongs to the current thread
686 686 * must disable fpu, thread calling ctxfree is curthread
687 687 *
688 688 * 3) from restorecontext -> setfpregs -> fp_free
689 689 * we have a modified context in the memory (lwp->pcb_fpu)
690 690 * disable fpu and release the fp context for the CPU
691 691 *
692 692 */
693 693 /*ARGSUSED*/
694 694 void
695 695 fp_free(struct fpu_ctx *fp, int isexec)
696 696 {
697 697 ASSERT(fp_kind != FP_NO);
698 698
699 699 if (fp->fpu_flags & FPU_VALID)
700 700 return;
701 701
702 702 kpreempt_disable();
703 703 /*
704 704 * We want to do fpsave rather than fpdisable so that we can
705 705 * keep the fpu_flags as FPU_VALID tracking the CR0_TS bit
706 706 */
707 707 fp->fpu_flags |= FPU_VALID;
708 708 /* If for current thread disable FP to track FPU_VALID */
709 709 if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) {
710 710 /* Clear errors if any to prevent frstor from complaining */
711 711 (void) fperr_reset();
712 712 if (fp_kind & __FP_SSE)
713 713 (void) fpxerr_reset();
714 714 fpdisable();
715 715 }
716 716 kpreempt_enable();
717 717 }
718 718
719 719 /*
720 720 * Store the floating point state and disable the floating point unit.
721 721 */
722 722 void
723 723 fp_save(struct fpu_ctx *fp)
724 724 {
725 725 ASSERT(fp_kind != FP_NO);
726 726
727 727 kpreempt_disable();
728 728 if (!fp || fp->fpu_flags & FPU_VALID ||
729 729 (fp->fpu_flags & FPU_EN) == 0) {
730 730 kpreempt_enable();
731 731 return;
732 732 }
733 733 ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu);
734 734
735 735 switch (fp_save_mech) {
736 736 case FP_FXSAVE:
737 737 fpxsave(fp->fpu_regs.kfpu_u.kfpu_fx);
738 738 break;
739 739
740 740 case FP_XSAVE:
741 741 xsavep(fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
742 742 break;
743 743 default:
744 744 panic("Invalid fp_save_mech");
745 745 /*NOTREACHED*/
746 746 }
747 747
748 748 fp->fpu_flags |= FPU_VALID;
749 749
750 750 /*
751 751 * We save the FPU as part of forking, execing, modifications via /proc,
752 752 * restorecontext, etc. As such, we need to make sure that we return to
753 753 * userland with valid state in the FPU. If we're context switched out
754 754 * before we hit sys_rtt_common() we'll end up having restored the FPU
755 755 * as part of the context ops operations. The restore logic always makes
756 756 * sure that FPU_VALID is set before doing a restore so we don't restore
757 757 * it a second time.
758 758 */
759 759 PCB_SET_UPDATE_FPU(&curthread->t_lwp->lwp_pcb);
760 760
761 761 kpreempt_enable();
762 762 }
763 763
764 764 /*
765 765 * Restore the FPU context for the thread:
766 766 * The possibilities are:
767 767 * 1. No active FPU context: Load the new context into the FPU hw
768 768 * and enable the FPU.
769 769 */
770 770 void
771 771 fp_restore(struct fpu_ctx *fp)
772 772 {
773 773 switch (fp_save_mech) {
774 774 case FP_FXSAVE:
775 775 fpxrestore(fp->fpu_regs.kfpu_u.kfpu_fx);
776 776 break;
777 777
778 778 case FP_XSAVE:
779 779 xrestore(fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
780 780 break;
781 781 default:
782 782 panic("Invalid fp_save_mech");
783 783 /*NOTREACHED*/
784 784 }
785 785
786 786 fp->fpu_flags &= ~FPU_VALID;
787 787 }
788 788
|
↓ open down ↓ |
105 lines elided |
↑ open up ↑ |
789 789 /*
790 790 * Reset the FPU such that it is in a valid state for a new thread that is
791 791 * coming out of exec. The FPU will be in a usable state at this point. At this
792 792 * point we know that the FPU state has already been allocated and if this
793 793 * wasn't an init process, then it will have had fp_free() previously called.
794 794 */
795 795 void
796 796 fp_exec(void)
797 797 {
798 798 struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
799 + struct ctxop *ctx = installctx_preallocate();
799 800
800 801 if (fp_save_mech == FP_XSAVE) {
801 802 fp->fpu_xsave_mask = XFEATURE_FP_ALL;
802 803 }
803 804
804 805 /*
805 806 * Make sure that we're not preempted in the middle of initializing the
806 807 * FPU on CPU.
807 808 */
808 809 kpreempt_disable();
809 810 installctx(curthread, fp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp,
810 - fp_new_lwp, NULL, fp_free);
811 + fp_new_lwp, NULL, fp_free, ctx);
811 812 fpinit();
812 813 fp->fpu_flags = FPU_EN;
813 814 kpreempt_enable();
814 815 }
815 816
816 817
817 818 /*
818 819 * Seeds the initial state for the current thread. The possibilities are:
819 820 * 1. Another process has modified the FPU state before we have done any
820 821 * initialization: Load the FPU state from the LWP state.
821 822 * 2. The FPU state has not been externally modified: Load a clean state.
822 823 */
823 824 void
824 825 fp_seed(void)
825 826 {
826 827 struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
827 828
828 829 ASSERT(curthread->t_preempt >= 1);
|
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
829 830 ASSERT((fp->fpu_flags & FPU_EN) == 0);
830 831
831 832 /*
832 833 * Always initialize a new context and initialize the hardware.
833 834 */
834 835 if (fp_save_mech == FP_XSAVE) {
835 836 fp->fpu_xsave_mask = XFEATURE_FP_ALL;
836 837 }
837 838
838 839 installctx(curthread, fp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp,
839 - fp_new_lwp, NULL, fp_free);
840 + fp_new_lwp, NULL, fp_free, NULL);
840 841 fpinit();
841 842
842 843 /*
843 844 * If FPU_VALID is set, it means someone has modified registers via
844 845 * /proc. In this case, restore the current lwp's state.
845 846 */
846 847 if (fp->fpu_flags & FPU_VALID)
847 848 fp_restore(fp);
848 849
849 850 ASSERT((fp->fpu_flags & FPU_VALID) == 0);
850 851 fp->fpu_flags = FPU_EN;
851 852 }
852 853
853 854 /*
854 855 * When using xsave/xrstor, these three functions are used by the lwp code to
855 856 * manage the memory for the xsave area.
856 857 */
857 858 void
858 859 fp_lwp_init(struct _klwp *lwp)
859 860 {
860 861 struct fpu_ctx *fp = &lwp->lwp_pcb.pcb_fpu;
861 862
862 863 /*
863 864 * We keep a copy of the pointer in lwp_fpu so that we can restore the
864 865 * value in forklwp() after we duplicate the parent's LWP state.
865 866 */
866 867 lwp->lwp_fpu = fp->fpu_regs.kfpu_u.kfpu_generic =
867 868 kmem_cache_alloc(fpsave_cachep, KM_SLEEP);
868 869
869 870 if (fp_save_mech == FP_XSAVE) {
870 871 /*
871 872 *
872 873 * We bzero since the fpinit() code path will only
873 874 * partially initialize the xsave area using avx_inital.
874 875 */
875 876 ASSERT(cpuid_get_xsave_size() >= sizeof (struct xsave_state));
876 877 bzero(fp->fpu_regs.kfpu_u.kfpu_xs, cpuid_get_xsave_size());
877 878 }
878 879 }
879 880
880 881 void
881 882 fp_lwp_cleanup(struct _klwp *lwp)
882 883 {
883 884 struct fpu_ctx *fp = &lwp->lwp_pcb.pcb_fpu;
884 885
885 886 if (fp->fpu_regs.kfpu_u.kfpu_generic != NULL) {
886 887 kmem_cache_free(fpsave_cachep,
887 888 fp->fpu_regs.kfpu_u.kfpu_generic);
888 889 lwp->lwp_fpu = fp->fpu_regs.kfpu_u.kfpu_generic = NULL;
889 890 }
890 891 }
891 892
892 893 /*
893 894 * Called during the process of forklwp(). The kfpu_u pointer will have been
894 895 * overwritten while copying the parent's LWP structure. We have a valid copy
895 896 * stashed in the child's lwp_fpu which we use to restore the correct value.
896 897 */
897 898 void
898 899 fp_lwp_dup(struct _klwp *lwp)
899 900 {
900 901 void *xp = lwp->lwp_fpu;
901 902 size_t sz;
902 903
903 904 switch (fp_save_mech) {
904 905 case FP_FXSAVE:
905 906 sz = sizeof (struct fxsave_state);
906 907 break;
907 908 case FP_XSAVE:
908 909 sz = cpuid_get_xsave_size();
909 910 break;
910 911 default:
911 912 panic("Invalid fp_save_mech");
912 913 /*NOTREACHED*/
913 914 }
914 915
915 916 /* copy the parent's values into the new lwp's struct */
916 917 bcopy(lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic, xp, sz);
917 918 /* now restore the pointer */
918 919 lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic = xp;
919 920 }
920 921
921 922 /*
922 923 * Handle a processor extension error fault
923 924 * Returns non zero for error.
924 925 */
925 926
926 927 /*ARGSUSED*/
927 928 int
928 929 fpexterrflt(struct regs *rp)
929 930 {
930 931 uint32_t fpcw, fpsw;
931 932 fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
932 933
933 934 ASSERT(fp_kind != FP_NO);
934 935
935 936 /*
936 937 * Now we can enable the interrupts.
937 938 * (NOTE: x87 fp exceptions come thru interrupt gate)
938 939 */
939 940 sti();
940 941
941 942 if (!fpu_exists)
942 943 return (FPE_FLTINV);
943 944
944 945 /*
945 946 * Do an unconditional save of the FP state. If it's dirty (TS=0),
946 947 * it'll be saved into the fpu context area passed in (that of the
947 948 * current thread). If it's not dirty (it may not be, due to
948 949 * an intervening save due to a context switch between the sti(),
949 950 * above and here, then it's safe to just use the stored values in
950 951 * the context save area to determine the cause of the fault.
951 952 */
952 953 fp_save(fp);
953 954
954 955 /* clear exception flags in saved state, as if by fnclex */
955 956 switch (fp_save_mech) {
956 957 case FP_FXSAVE:
957 958 fpsw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw;
958 959 fpcw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fcw;
959 960 fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw &= ~FPS_SW_EFLAGS;
960 961 break;
961 962
962 963 case FP_XSAVE:
963 964 fpsw = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw;
964 965 fpcw = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fcw;
965 966 fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw &= ~FPS_SW_EFLAGS;
966 967 /*
967 968 * Always set LEGACY_FP as it may have been cleared by XSAVE
968 969 * instruction
969 970 */
970 971 fp->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= XFEATURE_LEGACY_FP;
971 972 break;
972 973 default:
973 974 panic("Invalid fp_save_mech");
974 975 /*NOTREACHED*/
975 976 }
976 977
977 978 fp->fpu_regs.kfpu_status = fpsw;
978 979
979 980 if ((fpsw & FPS_ES) == 0)
980 981 return (0); /* No exception */
981 982
982 983 /*
983 984 * "and" the exception flags with the complement of the mask
984 985 * bits to determine which exception occurred
985 986 */
986 987 return (fpe_sicode(fpsw & ~fpcw & 0x3f));
987 988 }
988 989
989 990 /*
990 991 * Handle an SSE/SSE2 precise exception.
991 992 * Returns a non-zero sicode for error.
992 993 */
993 994 /*ARGSUSED*/
994 995 int
995 996 fpsimderrflt(struct regs *rp)
996 997 {
997 998 uint32_t mxcsr, xmask;
998 999 fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
999 1000
1000 1001 ASSERT(fp_kind & __FP_SSE);
1001 1002
1002 1003 /*
1003 1004 * NOTE: Interrupts are disabled during execution of this
1004 1005 * function. They are enabled by the caller in trap.c.
1005 1006 */
1006 1007
1007 1008 /*
1008 1009 * The only way we could have gotten here if there is no FP unit
1009 1010 * is via a user executing an INT $19 instruction, so there is
1010 1011 * no fault in that case.
1011 1012 */
1012 1013 if (!fpu_exists)
1013 1014 return (0);
1014 1015
1015 1016 /*
1016 1017 * Do an unconditional save of the FP state. If it's dirty (TS=0),
1017 1018 * it'll be saved into the fpu context area passed in (that of the
1018 1019 * current thread). If it's not dirty, then it's safe to just use
1019 1020 * the stored values in the context save area to determine the
1020 1021 * cause of the fault.
1021 1022 */
1022 1023 fp_save(fp); /* save the FPU state */
1023 1024
1024 1025 if (fp_save_mech == FP_XSAVE) {
1025 1026 mxcsr = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_mxcsr;
1026 1027 fp->fpu_regs.kfpu_status =
1027 1028 fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw;
1028 1029 } else {
1029 1030 mxcsr = fp->fpu_regs.kfpu_u.kfpu_fx->fx_mxcsr;
1030 1031 fp->fpu_regs.kfpu_status = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw;
1031 1032 }
1032 1033 fp->fpu_regs.kfpu_xstatus = mxcsr;
1033 1034
1034 1035 /*
1035 1036 * compute the mask that determines which conditions can cause
1036 1037 * a #xm exception, and use this to clean the status bits so that
1037 1038 * we can identify the true cause of this one.
1038 1039 */
1039 1040 xmask = (mxcsr >> 7) & SSE_MXCSR_EFLAGS;
1040 1041 return (fpe_simd_sicode((mxcsr & SSE_MXCSR_EFLAGS) & ~xmask));
1041 1042 }
1042 1043
1043 1044 /*
1044 1045 * In the unlikely event that someone is relying on this subcode being
1045 1046 * FPE_FLTILL for denormalize exceptions, it can always be patched back
1046 1047 * again to restore old behaviour.
1047 1048 */
1048 1049 int fpe_fltden = FPE_FLTDEN;
1049 1050
1050 1051 /*
1051 1052 * Map from the FPU status word to the FP exception si_code.
1052 1053 */
1053 1054 static int
1054 1055 fpe_sicode(uint_t sw)
1055 1056 {
1056 1057 if (sw & FPS_IE)
1057 1058 return (FPE_FLTINV);
1058 1059 if (sw & FPS_ZE)
1059 1060 return (FPE_FLTDIV);
1060 1061 if (sw & FPS_DE)
1061 1062 return (fpe_fltden);
1062 1063 if (sw & FPS_OE)
1063 1064 return (FPE_FLTOVF);
1064 1065 if (sw & FPS_UE)
1065 1066 return (FPE_FLTUND);
1066 1067 if (sw & FPS_PE)
1067 1068 return (FPE_FLTRES);
1068 1069 return (FPE_FLTINV); /* default si_code for other exceptions */
1069 1070 }
1070 1071
1071 1072 /*
1072 1073 * Map from the SSE status word to the FP exception si_code.
1073 1074 */
1074 1075 static int
1075 1076 fpe_simd_sicode(uint_t sw)
1076 1077 {
1077 1078 if (sw & SSE_IE)
1078 1079 return (FPE_FLTINV);
1079 1080 if (sw & SSE_ZE)
1080 1081 return (FPE_FLTDIV);
1081 1082 if (sw & SSE_DE)
1082 1083 return (FPE_FLTDEN);
1083 1084 if (sw & SSE_OE)
1084 1085 return (FPE_FLTOVF);
1085 1086 if (sw & SSE_UE)
1086 1087 return (FPE_FLTUND);
1087 1088 if (sw & SSE_PE)
1088 1089 return (FPE_FLTRES);
1089 1090 return (FPE_FLTINV); /* default si_code for other exceptions */
1090 1091 }
1091 1092
1092 1093 /*
1093 1094 * This routine is invoked as part of libc's __fpstart implementation
1094 1095 * via sysi86(2).
1095 1096 *
1096 1097 * It may be called -before- any context has been assigned in which case
1097 1098 * we try and avoid touching the hardware. Or it may be invoked well
1098 1099 * after the context has been assigned and fiddled with, in which case
1099 1100 * just tweak it directly.
1100 1101 */
1101 1102 void
1102 1103 fpsetcw(uint16_t fcw, uint32_t mxcsr)
1103 1104 {
1104 1105 struct fpu_ctx *fp = &curthread->t_lwp->lwp_pcb.pcb_fpu;
1105 1106 struct fxsave_state *fx;
1106 1107
1107 1108 if (!fpu_exists || fp_kind == FP_NO)
1108 1109 return;
1109 1110
1110 1111 if ((fp->fpu_flags & FPU_EN) == 0) {
1111 1112 if (fcw == FPU_CW_INIT && mxcsr == SSE_MXCSR_INIT) {
1112 1113 /*
1113 1114 * Common case. Floating point unit not yet
1114 1115 * enabled, and kernel already intends to initialize
1115 1116 * the hardware the way the caller wants.
1116 1117 */
1117 1118 return;
1118 1119 }
1119 1120 /*
1120 1121 * Hmm. Userland wants a different default.
1121 1122 * Do a fake "first trap" to establish the context, then
1122 1123 * handle as if we already had a context before we came in.
1123 1124 */
1124 1125 kpreempt_disable();
1125 1126 fp_seed();
1126 1127 kpreempt_enable();
1127 1128 }
1128 1129
1129 1130 /*
1130 1131 * Ensure that the current hardware state is flushed back to the
1131 1132 * pcb, then modify that copy. Next use of the fp will
1132 1133 * restore the context.
1133 1134 */
1134 1135 fp_save(fp);
1135 1136
1136 1137 switch (fp_save_mech) {
1137 1138 case FP_FXSAVE:
1138 1139 fx = fp->fpu_regs.kfpu_u.kfpu_fx;
1139 1140 fx->fx_fcw = fcw;
1140 1141 fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
1141 1142 break;
1142 1143
1143 1144 case FP_XSAVE:
1144 1145 fx = &fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave;
1145 1146 fx->fx_fcw = fcw;
1146 1147 fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
1147 1148 /*
1148 1149 * Always set LEGACY_FP as it may have been cleared by XSAVE
1149 1150 * instruction
1150 1151 */
1151 1152 fp->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= XFEATURE_LEGACY_FP;
1152 1153 break;
1153 1154 default:
1154 1155 panic("Invalid fp_save_mech");
1155 1156 /*NOTREACHED*/
1156 1157 }
1157 1158 }
1158 1159
1159 1160 static void
1160 1161 kernel_fpu_fpstate_init(kfpu_state_t *kfpu)
1161 1162 {
1162 1163 struct xsave_state *xs;
1163 1164
1164 1165 switch (fp_save_mech) {
1165 1166 case FP_FXSAVE:
1166 1167 bcopy(&sse_initial, kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_fx,
1167 1168 sizeof (struct fxsave_state));
1168 1169 kfpu->kfpu_ctx.fpu_xsave_mask = 0;
1169 1170 break;
1170 1171 case FP_XSAVE:
1171 1172 xs = kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_xs;
1172 1173 bzero(xs, cpuid_get_xsave_size());
1173 1174 bcopy(&avx_initial, xs, sizeof (*xs));
1174 1175 xs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
1175 1176 kfpu->kfpu_ctx.fpu_xsave_mask = XFEATURE_FP_ALL;
1176 1177 break;
1177 1178 default:
1178 1179 panic("invalid fp_save_mech");
1179 1180 }
1180 1181
1181 1182 /*
1182 1183 * Set the corresponding flags that the system expects on the FPU state
1183 1184 * to indicate that this is our state. The FPU_EN flag is required to
1184 1185 * indicate that FPU usage is allowed. The FPU_KERN flag is explicitly
1185 1186 * not set below as it represents that this state is being suppressed
1186 1187 * by the kernel.
1187 1188 */
1188 1189 kfpu->kfpu_ctx.fpu_flags = FPU_EN | FPU_VALID;
1189 1190 kfpu->kfpu_flags |= KFPU_F_INITIALIZED;
1190 1191 }
1191 1192
1192 1193 kfpu_state_t *
1193 1194 kernel_fpu_alloc(int kmflags)
1194 1195 {
1195 1196 kfpu_state_t *kfpu;
1196 1197
1197 1198 if ((kfpu = kmem_zalloc(sizeof (kfpu_state_t), kmflags)) == NULL) {
1198 1199 return (NULL);
1199 1200 }
1200 1201
1201 1202 kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic =
1202 1203 kmem_cache_alloc(fpsave_cachep, kmflags);
1203 1204 if (kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic == NULL) {
1204 1205 kmem_free(kfpu, sizeof (kfpu_state_t));
1205 1206 return (NULL);
1206 1207 }
1207 1208
1208 1209 kernel_fpu_fpstate_init(kfpu);
1209 1210
1210 1211 return (kfpu);
1211 1212 }
1212 1213
1213 1214 void
1214 1215 kernel_fpu_free(kfpu_state_t *kfpu)
1215 1216 {
1216 1217 kmem_cache_free(fpsave_cachep,
1217 1218 kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic);
1218 1219 kmem_free(kfpu, sizeof (kfpu_state_t));
1219 1220 }
1220 1221
1221 1222 static void
1222 1223 kernel_fpu_ctx_save(void *arg)
1223 1224 {
1224 1225 kfpu_state_t *kfpu = arg;
1225 1226 fpu_ctx_t *pf;
1226 1227
1227 1228 if (kfpu == NULL) {
1228 1229 /*
1229 1230 * A NULL kfpu implies this is a kernel thread with an LWP and
1230 1231 * no user-level FPU usage. Use the lwp fpu save area.
1231 1232 */
1232 1233 pf = &curthread->t_lwp->lwp_pcb.pcb_fpu;
1233 1234
1234 1235 ASSERT(curthread->t_procp->p_flag & SSYS);
1235 1236 ASSERT3U(pf->fpu_flags & FPU_VALID, ==, 0);
1236 1237
1237 1238 fp_save(pf);
1238 1239 } else {
1239 1240 pf = &kfpu->kfpu_ctx;
1240 1241
1241 1242 ASSERT3P(kfpu->kfpu_curthread, ==, curthread);
1242 1243 ASSERT3U(pf->fpu_flags & FPU_VALID, ==, 0);
1243 1244
1244 1245 /*
1245 1246 * Note, we can't use fp_save because it assumes that we're
1246 1247 * saving to the thread's PCB and not somewhere else. Because
1247 1248 * this is a different FPU context, we instead have to do this
1248 1249 * ourselves.
1249 1250 */
1250 1251 switch (fp_save_mech) {
1251 1252 case FP_FXSAVE:
1252 1253 fpxsave(pf->fpu_regs.kfpu_u.kfpu_fx);
1253 1254 break;
1254 1255 case FP_XSAVE:
1255 1256 xsavep(pf->fpu_regs.kfpu_u.kfpu_xs, pf->fpu_xsave_mask);
1256 1257 break;
1257 1258 default:
1258 1259 panic("Invalid fp_save_mech");
1259 1260 }
1260 1261
1261 1262 /*
1262 1263 * Because we have saved context here, our save state is no
1263 1264 * longer valid and therefore needs to be reinitialized.
1264 1265 */
1265 1266 kfpu->kfpu_flags &= ~KFPU_F_INITIALIZED;
1266 1267 }
1267 1268
1268 1269 pf->fpu_flags |= FPU_VALID;
1269 1270
1270 1271 /*
1271 1272 * Clear KFPU flag. This allows swtch to check for improper kernel
1272 1273 * usage of the FPU (i.e. switching to a new thread while the old
1273 1274 * thread was in the kernel and using the FPU, but did not perform a
1274 1275 * context save).
1275 1276 */
1276 1277 curthread->t_flag &= ~T_KFPU;
1277 1278 }
1278 1279
1279 1280 static void
1280 1281 kernel_fpu_ctx_restore(void *arg)
1281 1282 {
1282 1283 kfpu_state_t *kfpu = arg;
1283 1284 fpu_ctx_t *pf;
1284 1285
1285 1286 if (kfpu == NULL) {
1286 1287 /*
1287 1288 * A NULL kfpu implies this is a kernel thread with an LWP and
1288 1289 * no user-level FPU usage. Use the lwp fpu save area.
1289 1290 */
1290 1291 pf = &curthread->t_lwp->lwp_pcb.pcb_fpu;
1291 1292
1292 1293 ASSERT(curthread->t_procp->p_flag & SSYS);
1293 1294 ASSERT3U(pf->fpu_flags & FPU_VALID, !=, 0);
1294 1295 } else {
1295 1296 pf = &kfpu->kfpu_ctx;
1296 1297
1297 1298 ASSERT3P(kfpu->kfpu_curthread, ==, curthread);
1298 1299 ASSERT3U(pf->fpu_flags & FPU_VALID, !=, 0);
1299 1300 }
1300 1301
1301 1302 fp_restore(pf);
1302 1303 curthread->t_flag |= T_KFPU;
1303 1304 }
1304 1305
1305 1306 /*
1306 1307 * Validate that the thread is not switching off-cpu while actively using the
1307 1308 * FPU within the kernel.
1308 1309 */
1309 1310 void
1310 1311 kernel_fpu_no_swtch(void)
|
↓ open down ↓ |
461 lines elided |
↑ open up ↑ |
1311 1312 {
1312 1313 if ((curthread->t_flag & T_KFPU) != 0) {
1313 1314 panic("curthread swtch-ing while the kernel is using the FPU");
1314 1315 }
1315 1316 }
1316 1317
1317 1318 void
1318 1319 kernel_fpu_begin(kfpu_state_t *kfpu, uint_t flags)
1319 1320 {
1320 1321 klwp_t *pl = curthread->t_lwp;
1322 + struct ctxop *ctx;
1321 1323
1322 1324 if ((curthread->t_flag & T_KFPU) != 0) {
1323 1325 panic("curthread attempting to nest kernel FPU states");
1324 1326 }
1325 1327
1326 1328 /* KFPU_USE_LWP and KFPU_NO_STATE are mutually exclusive. */
1327 1329 ASSERT((flags & (KFPU_USE_LWP | KFPU_NO_STATE)) !=
1328 1330 (KFPU_USE_LWP | KFPU_NO_STATE));
1329 1331
1330 1332 if ((flags & KFPU_NO_STATE) == KFPU_NO_STATE) {
1331 1333 /*
1332 1334 * Since we don't have a kfpu_state or usable lwp pcb_fpu to
1333 1335 * hold our kernel FPU context, we depend on the caller doing
1334 1336 * kpreempt_disable for the duration of our FPU usage. This
1335 1337 * should only be done for very short periods of time.
1336 1338 */
1337 1339 ASSERT(curthread->t_preempt > 0);
1338 1340 ASSERT(kfpu == NULL);
1339 1341
1340 1342 if (pl != NULL) {
1341 1343 /*
1342 1344 * We might have already saved once so FPU_VALID could
1343 1345 * be set. This is handled in fp_save.
1344 1346 */
1345 1347 fp_save(&pl->lwp_pcb.pcb_fpu);
1346 1348 pl->lwp_pcb.pcb_fpu.fpu_flags |= FPU_KERNEL;
1347 1349 }
1348 1350
1349 1351 curthread->t_flag |= T_KFPU;
1350 1352
1351 1353 /* Always restore the fpu to the initial state. */
1352 1354 fpinit();
1353 1355
1354 1356 return;
1355 1357 }
1356 1358
1357 1359 /*
1358 1360 * We either have a kfpu, or are using the LWP pcb_fpu for context ops.
1359 1361 */
1360 1362
1361 1363 if ((flags & KFPU_USE_LWP) == 0) {
1362 1364 if (kfpu->kfpu_curthread != NULL)
1363 1365 panic("attempting to reuse kernel FPU state at %p when "
1364 1366 "another thread already is using", kfpu);
1365 1367
1366 1368 if ((kfpu->kfpu_flags & KFPU_F_INITIALIZED) == 0)
1367 1369 kernel_fpu_fpstate_init(kfpu);
1368 1370
1369 1371 kfpu->kfpu_curthread = curthread;
1370 1372 }
1371 1373
1372 1374 /*
|
↓ open down ↓ |
42 lines elided |
↑ open up ↑ |
1373 1375 * Not all threads may have an active LWP. If they do and we're not
1374 1376 * going to re-use the LWP, then we should go ahead and save the state.
1375 1377 * We must also note that the fpu is now being used by the kernel and
1376 1378 * therefore we do not want to manage the fpu state via the user-level
1377 1379 * thread's context handlers.
1378 1380 *
1379 1381 * We might have already saved once (due to a prior use of the kernel
1380 1382 * FPU or another code path) so FPU_VALID could be set. This is handled
1381 1383 * by fp_save, as is the FPU_EN check.
1382 1384 */
1385 + ctx = installctx_preallocate();
1386 + kpreempt_disable();
1383 1387 if (pl != NULL) {
1384 - kpreempt_disable();
1385 1388 if ((flags & KFPU_USE_LWP) == 0)
1386 1389 fp_save(&pl->lwp_pcb.pcb_fpu);
1387 1390 pl->lwp_pcb.pcb_fpu.fpu_flags |= FPU_KERNEL;
1388 - kpreempt_enable();
1389 1391 }
1390 1392
1391 1393 /*
1392 - * Set the context operations for kernel FPU usage. Note that this
1393 - * cannot be done with pre-emption and interrupts disabled, since
1394 - * installctx does a sleeping allocation. We haven't finished
1395 - * initializing our kernel FPU state yet, but in the rare case that we
1396 - * happen to save/restore before that, no harm is done.
1394 + * Set the context operations for kernel FPU usage. Note that this is
1395 + * done with a preallocated buffer and under kpreempt_disable because
1396 + * without a preallocated buffer, installctx does a sleeping
1397 + * allocation. We haven't finished initializing our kernel FPU state
1398 + * yet, and in the rare case that we happen to save/restore just as
1399 + * installctx() exits its own kpreempt_enable() internal call, we
1400 + * guard against restoring an uninitialized buffer (0xbaddcafe).
1397 1401 */
1398 1402 installctx(curthread, kfpu, kernel_fpu_ctx_save, kernel_fpu_ctx_restore,
1399 - NULL, NULL, NULL, NULL);
1403 + NULL, NULL, NULL, NULL, ctx);
1400 1404
1401 1405 curthread->t_flag |= T_KFPU;
1402 1406
1403 1407 if ((flags & KFPU_USE_LWP) == KFPU_USE_LWP) {
1404 1408 /*
1405 1409 * For pure kernel threads with an LWP, we can use the LWP's
1406 1410 * pcb_fpu to save/restore context.
1407 1411 */
1408 1412 fpu_ctx_t *pf = &pl->lwp_pcb.pcb_fpu;
1409 1413
1410 1414 VERIFY(curthread->t_procp->p_flag & SSYS);
1411 1415 VERIFY(kfpu == NULL);
1412 1416 ASSERT((pf->fpu_flags & FPU_EN) == 0);
|
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
1413 1417
1414 1418 /* Always restore the fpu to the initial state. */
1415 1419 if (fp_save_mech == FP_XSAVE)
1416 1420 pf->fpu_xsave_mask = XFEATURE_FP_ALL;
1417 1421 fpinit();
1418 1422 pf->fpu_flags = FPU_EN | FPU_KERNEL;
1419 1423 } else {
1420 1424 /* initialize the kfpu state */
1421 1425 kernel_fpu_ctx_restore(kfpu);
1422 1426 }
1427 + kpreempt_enable();
1423 1428 }
1424 1429
1425 1430 void
1426 1431 kernel_fpu_end(kfpu_state_t *kfpu, uint_t flags)
1427 1432 {
1428 1433 ulong_t iflags;
1429 1434
1430 1435 if ((curthread->t_flag & T_KFPU) == 0) {
1431 1436 panic("curthread attempting to clear kernel FPU state "
1432 1437 "without using it");
1433 1438 }
1434 1439
1435 1440 /*
1436 1441 * General comments on why the rest of this function is structured the
1437 1442 * way it is. Be aware that there is a lot of subtlety here.
1438 1443 *
1439 1444 * If a user-level thread ever uses the fpu while in the kernel, then
1440 1445 * we cannot call fpdisable since that does STTS. That will set the
1441 1446 * ts bit in %cr0 which will cause an exception if anything touches the
1442 1447 * fpu. However, the user-level context switch handler (fpsave_ctxt)
1443 1448 * needs to access the fpu to save the registers into the pcb.
1444 1449 * fpsave_ctxt relies on CLTS having been done to clear the ts bit in
1445 1450 * fprestore_ctxt when the thread context switched onto the CPU.
1446 1451 *
1447 1452 * Calling fpdisable only effects the current CPU's %cr0 register.
1448 1453 *
1449 1454 * During removectx and kpreempt_enable, we can voluntarily context
1450 1455 * switch, so the CPU we were on when we entered this function might
1451 1456 * not be the same one we're on when we return from removectx or end
1452 1457 * the function. Note there can be user-level context switch handlers
1453 1458 * still installed if this is a user-level thread.
1454 1459 *
1455 1460 * We also must be careful in the unlikely chance we're running in an
1456 1461 * interrupt thread, since we can't leave the CPU's %cr0 TS state set
1457 1462 * incorrectly for the "real" thread to resume on this CPU.
1458 1463 */
1459 1464
1460 1465 if ((flags & KFPU_NO_STATE) == 0) {
1461 1466 kpreempt_disable();
1462 1467 } else {
1463 1468 ASSERT(curthread->t_preempt > 0);
1464 1469 }
1465 1470
1466 1471 curthread->t_flag &= ~T_KFPU;
1467 1472
1468 1473 /*
1469 1474 * When we are ending things, we explicitly don't save the current
1470 1475 * kernel FPU state back to the temporary state. The kfpu API is not
1471 1476 * intended to be a permanent save location.
1472 1477 *
1473 1478 * If this is a user-level thread and we were to context switch
1474 1479 * before returning to user-land, fpsave_ctxt will be a no-op since we
1475 1480 * already saved the user-level FPU state the first time we run
1476 1481 * kernel_fpu_begin (i.e. we won't save the bad kernel fpu state over
1477 1482 * the user-level fpu state). The fpsave_ctxt functions only save if
1478 1483 * FPU_VALID is not already set. fp_save also set PCB_SET_UPDATE_FPU so
1479 1484 * fprestore_ctxt will be done in sys_rtt_common when the thread
1480 1485 * finally returns to user-land.
1481 1486 */
1482 1487
1483 1488 if ((curthread->t_procp->p_flag & SSYS) != 0 &&
1484 1489 curthread->t_intr == NULL) {
1485 1490 /*
1486 1491 * A kernel thread which is not an interrupt thread, so we
1487 1492 * STTS now.
1488 1493 */
1489 1494 fpdisable();
1490 1495 }
1491 1496
1492 1497 if ((flags & KFPU_NO_STATE) == 0) {
1493 1498 removectx(curthread, kfpu, kernel_fpu_ctx_save,
1494 1499 kernel_fpu_ctx_restore, NULL, NULL, NULL, NULL);
1495 1500
1496 1501 if (kfpu != NULL) {
1497 1502 if (kfpu->kfpu_curthread != curthread) {
1498 1503 panic("attempting to end kernel FPU state "
1499 1504 "for %p, but active thread is not "
1500 1505 "curthread", kfpu);
1501 1506 } else {
1502 1507 kfpu->kfpu_curthread = NULL;
1503 1508 }
1504 1509 }
1505 1510
1506 1511 kpreempt_enable();
1507 1512 }
1508 1513
1509 1514 if (curthread->t_lwp != NULL) {
1510 1515 uint_t f;
1511 1516
1512 1517 if (flags & KFPU_USE_LWP) {
1513 1518 f = FPU_EN | FPU_KERNEL;
1514 1519 } else {
1515 1520 f = FPU_KERNEL;
1516 1521 }
1517 1522 curthread->t_lwp->lwp_pcb.pcb_fpu.fpu_flags &= ~f;
1518 1523 }
1519 1524 }
|
↓ open down ↓ |
87 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX