1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /*
31 * Copyright 2023 Oxide Computer Company
32 */
33
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/vmparam.h>
37 #include <sys/systm.h>
38 #include <sys/signal.h>
39 #include <sys/stack.h>
40 #include <sys/regset.h>
41 #include <sys/privregs.h>
42 #include <sys/frame.h>
43 #include <sys/proc.h>
44 #include <sys/brand.h>
45 #include <sys/psw.h>
46 #include <sys/ucontext.h>
47 #include <sys/asm_linkage.h>
48 #include <sys/errno.h>
49 #include <sys/archsystm.h>
50 #include <sys/schedctl.h>
51 #include <sys/debug.h>
52 #include <sys/sysmacros.h>
53
54 /*
55 * This is a wrapper around copyout_noerr that returns a guaranteed error code.
56 * Because we're using copyout_noerr(), we need to bound the time we're under an
57 * on_fault/no_fault and attempt to do so only while we're actually copying data
58 * out. The main reason for this is because we're being called back from the
59 * FPU, which is being held with a kpreempt_disable() and related, we can't use
60 * a larger on_fault()/no_fault() as that would both hide legitimate errors we
61 * make, masquerading as user issues, and it gets trickier to reason about the
62 * correct restoration of our state.
63 */
64 static int
65 savecontext_copyout(const void *kaddr, void *uaddr, size_t size)
66 {
67 label_t ljb;
68 if (!on_fault(&ljb)) {
69 copyout_noerr(kaddr, uaddr, size);
70 no_fault();
71 return (0);
72 } else {
73 no_fault();
74 return (EFAULT);
75 }
76 }
77
78 /*
79 * Save user context.
80 *
81 * Generally speaking ucp is a pointer to kernel memory. In the traditional
82 * version of this (when flags is 0), then we just write and fill out all of the
83 * ucontext_t without any care for what was there ahead of this. However, when
84 * we extended the state to push additional data when user pointers in the
85 * ucontext_t are valid (currently only uc_xsave), then we will copy out that
86 * extended state to the user pointer.
87 *
88 * We allow the copying to happen in two different ways mostly because this is
89 * also used in the signal handling context where we must be much more careful
90 * about how to copy out data.
91 */
92
93 int
94 savecontext(ucontext_t *ucp, const k_sigset_t *mask, savecontext_flags_t flags)
95 {
96 proc_t *p = ttoproc(curthread);
97 klwp_t *lwp = ttolwp(curthread);
98 struct regs *rp = lwptoregs(lwp);
99 boolean_t need_xsave = B_FALSE;
100 boolean_t fpu_en;
101 long user_xsave = 0;
102 int ret;
103
104 VERIFY0(flags & ~(SAVECTXT_F_EXTD | SAVECTXT_F_ONFAULT));
105
106 /*
107 * We unconditionally assign to every field through the end
108 * of the gregs, but we need to bzero() everything -after- that
109 * to avoid having any kernel stack garbage escape to userland.
110 *
111 * If we have been asked to save extended state, then we must make sure
112 * that we don't clobber that value. We must also determine if the
113 * processor has xsave state. If it does not, then we just simply honor
114 * the pointer, but do not write anything out and do not set the flag.
115 */
116 if ((flags & SAVECTXT_F_EXTD) != 0) {
117 user_xsave = ucp->uc_xsave;
118 if (fpu_xsave_enabled() && user_xsave != 0) {
119 need_xsave = B_TRUE;
120 }
121 } else {
122 /*
123 * The only other flag that we have right now is about modifying
124 * the copyout behavior when we're copying out extended
125 * information. If it's not here, we should not do anything.
126 */
127 VERIFY0(flags);
128 }
129 bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext_t) -
130 offsetof(ucontext_t, uc_mcontext.fpregs));
131 ucp->uc_xsave = user_xsave;
132
133 ucp->uc_flags = UC_ALL;
134 ucp->uc_link = (struct ucontext *)lwp->lwp_oldcontext;
135
136 /*
137 * Try to copyin() the ustack if one is registered. If the stack
138 * has zero size, this indicates that stack bounds checking has
139 * been disabled for this LWP. If stack bounds checking is disabled
140 * or the copyin() fails, we fall back to the legacy behavior.
141 */
142 if (lwp->lwp_ustack == (uintptr_t)NULL ||
143 copyin((void *)lwp->lwp_ustack, &ucp->uc_stack,
144 sizeof (ucp->uc_stack)) != 0 ||
145 ucp->uc_stack.ss_size == 0) {
146
147 if (lwp->lwp_sigaltstack.ss_flags == SS_ONSTACK) {
148 ucp->uc_stack = lwp->lwp_sigaltstack;
149 } else {
150 ucp->uc_stack.ss_sp = p->p_usrstack - p->p_stksize;
151 ucp->uc_stack.ss_size = p->p_stksize;
152 ucp->uc_stack.ss_flags = 0;
153 }
154 }
155
156 /*
157 * If either the trace flag or REQUEST_STEP is set,
158 * arrange for single-stepping and turn off the trace flag.
159 */
160 if ((rp->r_ps & PS_T) || (lwp->lwp_pcb.pcb_flags & REQUEST_STEP)) {
161 /*
162 * Clear PS_T so that saved user context won't have trace
163 * flag set.
164 */
165 rp->r_ps &= ~PS_T;
166
167 if (!(lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP)) {
168 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
169 /*
170 * trap() always checks DEBUG_PENDING before
171 * checking for any pending signal. This at times
172 * can potentially lead to DEBUG_PENDING not being
173 * honoured. (for eg: the lwp is stopped by
174 * stop_on_fault() called from trap(), after being
175 * awakened it might see a pending signal and call
176 * savecontext(), however on the way back to userland
177 * there is no place it can be detected). Hence in
178 * anticipation of such occasions, set AST flag for
179 * the thread which will make the thread take an
180 * excursion through trap() where it will be handled
181 * appropriately.
182 */
183 aston(curthread);
184 }
185 }
186
187 getgregs(lwp, ucp->uc_mcontext.gregs);
188 fpu_en = (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) != 0;
189 if (fpu_en)
190 getfpregs(lwp, &ucp->uc_mcontext.fpregs);
191 else
192 ucp->uc_flags &= ~UC_FPU;
193
194 sigktou(mask, &ucp->uc_sigmask);
195
196 /*
197 * Determine if we need to get the reset of the xsave context out here.
198 * If the thread doesn't actually have the FPU enabled, then we don't
199 * actually need to do this. We also don't have to if it wasn't
200 * requested.
201 */
202 if (!need_xsave || !fpu_en) {
203 return (0);
204 }
205
206 ucp->uc_flags |= UC_XSAVE;
207
208 /*
209 * While you might be asking why and contemplating despair, just know
210 * that some things need to just be done in the face of signal (half the
211 * reason this function exists). Basically when in signal context we
212 * can't trigger watch points. This means we need to tell the FPU copy
213 * logic to actually use the on_fault/no_fault and the non-error form of
214 * copyout (which still checks if it's a user address at least).
215 */
216 if ((flags & SAVECTXT_F_ONFAULT) != 0) {
217 ret = fpu_signal_copyout(lwp, ucp->uc_xsave,
218 savecontext_copyout);
219 } else {
220 ret = fpu_signal_copyout(lwp, ucp->uc_xsave, copyout);
221 }
222
223 return (ret);
224 }
225
226 /*
227 * Restore user context.
228 */
229 void
230 restorecontext(ucontext_t *ucp)
231 {
232 kthread_t *t = curthread;
233 klwp_t *lwp = ttolwp(t);
234
235 lwp->lwp_oldcontext = (uintptr_t)ucp->uc_link;
236
237 if (ucp->uc_flags & UC_STACK) {
238 if (ucp->uc_stack.ss_flags == SS_ONSTACK)
239 lwp->lwp_sigaltstack = ucp->uc_stack;
240 else
241 lwp->lwp_sigaltstack.ss_flags &= ~SS_ONSTACK;
242 }
243
244 if (ucp->uc_flags & UC_CPU) {
245 /*
246 * If the trace flag is set, mark the lwp to take a
247 * single-step trap on return to user level (below).
248 * The x86 lcall interface and sysenter has already done this,
249 * and turned off the flag, but amd64 syscall interface has not.
250 */
251 if (lwptoregs(lwp)->r_ps & PS_T)
252 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
253 setgregs(lwp, ucp->uc_mcontext.gregs);
254 lwp->lwp_eosys = JUSTRETURN;
255 t->t_post_sys = 1;
256 aston(curthread);
257 }
258
259 /*
260 * The logic to copy in the ucontex_t takes care of combining the UC_FPU
261 * and UC_XSAVE, so at this point only one of them should be set, if
262 * any.
263 */
264 if (ucp->uc_flags & UC_XSAVE) {
265 ASSERT0(ucp->uc_flags & UC_FPU);
266 ASSERT3U((uintptr_t)ucp->uc_xsave, >=, _kernelbase);
267 fpu_set_xsave(lwp, (const void *)ucp->uc_xsave);
268 } else if (ucp->uc_flags & UC_FPU) {
269 setfpregs(lwp, &ucp->uc_mcontext.fpregs);
270 }
271
272 if (ucp->uc_flags & UC_SIGMASK) {
273 /*
274 * We don't need to acquire p->p_lock here;
275 * we are manipulating thread-private data.
276 */
277 schedctl_finish_sigblock(t);
278 sigutok(&ucp->uc_sigmask, &t->t_hold);
279 if (sigcheck(ttoproc(t), t))
280 t->t_sig_check = 1;
281 }
282 }
283
284
285 int
286 getsetcontext(int flag, void *arg)
287 {
288 ucontext_t uc;
289 ucontext_t *ucp;
290 klwp_t *lwp = ttolwp(curthread);
291 void *fpu = NULL;
292 stack_t dummy_stk;
293 int ret;
294
295 /*
296 * In future releases, when the ucontext structure grows,
297 * getcontext should be modified to only return the fields
298 * specified in the uc_flags. That way, the structure can grow
299 * and still be binary compatible will all .o's which will only
300 * have old fields defined in uc_flags
301 */
302
303 switch (flag) {
304 default:
305 return (set_errno(EINVAL));
306
307 case GETCONTEXT:
308 schedctl_finish_sigblock(curthread);
309 ret = savecontext(&uc, &curthread->t_hold, SAVECTXT_F_NONE);
310 if (ret != 0)
311 return (set_errno(ret));
312 if (uc.uc_flags & UC_SIGMASK)
313 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
314 if (copyout(&uc, arg, sizeof (uc)))
315 return (set_errno(EFAULT));
316 return (0);
317
318 /*
319 * In the case of GETCONTEXT_EXTD, we've theoretically been given all
320 * the required pointers of the appropriate length by libc in the
321 * ucontext_t. We must first copyin the offsets that we care about to
322 * seed the known extensions. Right now that is just the uc_xsave
323 * member. As we are setting uc_flags, we only look at the members we
324 * need to care about.
325 *
326 * The main reason that we have a different entry point is that we don't
327 * want to assume that callers have always properly zeroed their
328 * ucontext_t ahead of calling into libc. In fact, it often is just
329 * declared on the stack so we can't assume that at all. Instead,
330 * getcontext_extd does require that.
331 */
332 case GETCONTEXT_EXTD:
333 schedctl_finish_sigblock(curthread);
334 ucp = arg;
335 if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
336 sizeof (uc.uc_xsave)) != 0) {
337 return (set_errno(EFAULT));
338 }
339 ret = savecontext(&uc, &curthread->t_hold, SAVECTXT_F_EXTD);
340 if (ret != 0)
341 return (set_errno(ret));
342 if (uc.uc_flags & UC_SIGMASK)
343 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
344 if (copyout(&uc, arg, sizeof (uc)))
345 return (set_errno(EFAULT));
346 return (0);
347
348
349 case SETCONTEXT:
350 ucp = arg;
351 if (ucp == NULL)
352 exit(CLD_EXITED, 0);
353 /*
354 * Don't copyin filler or floating state unless we need it.
355 * The ucontext_t struct and fields are specified in the ABI.
356 */
357 if (copyin(ucp, &uc, offsetof(ucontext_t, uc_filler) -
358 sizeof (uc.uc_mcontext.fpregs))) {
359 return (set_errno(EFAULT));
360 }
361 if (uc.uc_flags & UC_SIGMASK)
362 SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask);
363
364 if ((uc.uc_flags & UC_FPU) &&
365 copyin(&ucp->uc_mcontext.fpregs, &uc.uc_mcontext.fpregs,
366 sizeof (uc.uc_mcontext.fpregs))) {
367 return (set_errno(EFAULT));
368 }
369
370 uc.uc_xsave = 0;
371 if ((uc.uc_flags & UC_XSAVE) != 0) {
372 int ret;
373
374 if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
375 sizeof (uc.uc_xsave)) != 0) {
376 return (set_errno(EFAULT));
377 }
378
379 ret = fpu_signal_copyin(lwp, &uc);
380 if (ret != 0) {
381 return (set_errno(ret));
382 }
383 }
384
385 restorecontext(&uc);
386
387 if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0))
388 (void) copyout(&uc.uc_stack, (stack_t *)lwp->lwp_ustack,
389 sizeof (uc.uc_stack));
390 return (0);
391
392 case GETUSTACK:
393 if (copyout(&lwp->lwp_ustack, arg, sizeof (caddr_t)))
394 return (set_errno(EFAULT));
395 return (0);
396
397 case SETUSTACK:
398 if (copyin(arg, &dummy_stk, sizeof (dummy_stk)))
399 return (set_errno(EFAULT));
400 lwp->lwp_ustack = (uintptr_t)arg;
401 return (0);
402 }
403 }
404
405 #ifdef _SYSCALL32_IMPL
406
407 /*
408 * Save user context for 32-bit processes.
409 */
410 int
411 savecontext32(ucontext32_t *ucp, const k_sigset_t *mask,
412 savecontext_flags_t flags)
413 {
414 proc_t *p = ttoproc(curthread);
415 klwp_t *lwp = ttolwp(curthread);
416 struct regs *rp = lwptoregs(lwp);
417 boolean_t need_xsave = B_FALSE;
418 boolean_t fpu_en;
419 int32_t user_xsave = 0;
420 uintptr_t uaddr;
421 int ret;
422
423 /*
424 * See savecontext for an explanation of this.
425 */
426 if ((flags & SAVECTXT_F_EXTD) != 0) {
427 user_xsave = ucp->uc_xsave;
428 if (fpu_xsave_enabled() && user_xsave != 0) {
429 need_xsave = B_TRUE;
430 }
431 } else {
432 VERIFY0(flags);
433 }
434 bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext32_t) -
435 offsetof(ucontext32_t, uc_mcontext.fpregs));
436 ucp->uc_xsave = user_xsave;
437
438 ucp->uc_flags = UC_ALL;
439 ucp->uc_link = (caddr32_t)lwp->lwp_oldcontext;
440
441 if (lwp->lwp_ustack == (uintptr_t)NULL ||
442 copyin((void *)lwp->lwp_ustack, &ucp->uc_stack,
443 sizeof (ucp->uc_stack)) != 0 ||
444 ucp->uc_stack.ss_size == 0) {
445
446 if (lwp->lwp_sigaltstack.ss_flags == SS_ONSTACK) {
447 ucp->uc_stack.ss_sp =
448 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
449 ucp->uc_stack.ss_size =
450 (size32_t)lwp->lwp_sigaltstack.ss_size;
451 ucp->uc_stack.ss_flags = SS_ONSTACK;
452 } else {
453 ucp->uc_stack.ss_sp = (caddr32_t)(uintptr_t)
454 (p->p_usrstack - p->p_stksize);
455 ucp->uc_stack.ss_size = (size32_t)p->p_stksize;
456 ucp->uc_stack.ss_flags = 0;
457 }
458 }
459
460 /*
461 * If either the trace flag or REQUEST_STEP is set, arrange
462 * for single-stepping and turn off the trace flag.
463 */
464 if ((rp->r_ps & PS_T) || (lwp->lwp_pcb.pcb_flags & REQUEST_STEP)) {
465 /*
466 * Clear PS_T so that saved user context won't have trace
467 * flag set.
468 */
469 rp->r_ps &= ~PS_T;
470
471 if (!(lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP)) {
472 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
473 /*
474 * See comments in savecontext().
475 */
476 aston(curthread);
477 }
478 }
479
480 getgregs32(lwp, ucp->uc_mcontext.gregs);
481 fpu_en = (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) != 0;
482 if (fpu_en)
483 getfpregs32(lwp, &ucp->uc_mcontext.fpregs);
484 else
485 ucp->uc_flags &= ~UC_FPU;
486
487 sigktou(mask, &ucp->uc_sigmask);
488
489 if (!need_xsave || !fpu_en) {
490 return (0);
491 }
492
493 ucp->uc_flags |= UC_XSAVE;
494
495 /*
496 * Due to not wanting to change or break programs, the filler in the
497 * ucontext_t was always declared as a long, which is signed. Because
498 * this is the 32-bit version, this is an int32_t. We cannot directly go
499 * to a uintptr_t otherwise we might get sign extension, so we first
500 * have to go through a uint32_t and then a uintptr_t. Otherwise, see
501 * savecontext().
502 */
503 uaddr = (uintptr_t)(uint32_t)ucp->uc_xsave;
504 if ((flags & SAVECTXT_F_ONFAULT) != 0) {
505 ret = fpu_signal_copyout(lwp, uaddr, savecontext_copyout);
506 } else {
507 ret = fpu_signal_copyout(lwp, uaddr, copyout);
508 }
509
510 return (ret);
511 }
512
513 int
514 getsetcontext32(int flag, void *arg)
515 {
516 ucontext32_t uc;
517 ucontext_t ucnat;
518 ucontext32_t *ucp;
519 klwp_t *lwp = ttolwp(curthread);
520 caddr32_t ustack32;
521 stack32_t dummy_stk32;
522 int ret;
523
524 switch (flag) {
525 default:
526 return (set_errno(EINVAL));
527
528 case GETCONTEXT:
529 schedctl_finish_sigblock(curthread);
530 ret = savecontext32(&uc, &curthread->t_hold, SAVECTXT_F_NONE);
531 if (ret != 0)
532 return (set_errno(ret));
533 if (uc.uc_flags & UC_SIGMASK)
534 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
535 if (copyout(&uc, arg, sizeof (uc)))
536 return (set_errno(EFAULT));
537 return (0);
538
539 /*
540 * See getsetcontext() for an explanation of what is going on here.
541 */
542 case GETCONTEXT_EXTD:
543 schedctl_finish_sigblock(curthread);
544 ucp = arg;
545 if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
546 sizeof (uc.uc_xsave)) != 0) {
547 return (set_errno(EFAULT));
548 }
549 ret = savecontext32(&uc, &curthread->t_hold, SAVECTXT_F_EXTD);
550 if (ret != 0)
551 return (set_errno(ret));
552 if (uc.uc_flags & UC_SIGMASK)
553 SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
554 if (copyout(&uc, arg, sizeof (uc)))
555 return (set_errno(EFAULT));
556 return (0);
557
558 case SETCONTEXT:
559 ucp = arg;
560 if (ucp == NULL)
561 exit(CLD_EXITED, 0);
562 if (copyin(ucp, &uc, offsetof(ucontext32_t, uc_filler) -
563 sizeof (uc.uc_mcontext.fpregs))) {
564 return (set_errno(EFAULT));
565 }
566 if (uc.uc_flags & UC_SIGMASK)
567 SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask);
568 if ((uc.uc_flags & UC_FPU) &&
569 copyin(&ucp->uc_mcontext.fpregs, &uc.uc_mcontext.fpregs,
570 sizeof (uc.uc_mcontext.fpregs))) {
571 return (set_errno(EFAULT));
572 }
573
574 uc.uc_xsave = 0;
575 if ((uc.uc_flags & UC_XSAVE) != 0 &&
576 copyin(&ucp->uc_xsave, &uc.uc_xsave,
577 sizeof (uc.uc_xsave)) != 0) {
578 return (set_errno(EFAULT));
579 }
580
581 ucontext_32ton(&uc, &ucnat);
582
583 if ((ucnat.uc_flags & UC_XSAVE) != 0) {
584 int ret = fpu_signal_copyin(lwp, &ucnat);
585 if (ret != 0) {
586 return (set_errno(ret));
587 }
588 }
589
590 restorecontext(&ucnat);
591
592 if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0))
593 (void) copyout(&uc.uc_stack,
594 (stack32_t *)lwp->lwp_ustack, sizeof (uc.uc_stack));
595 return (0);
596
597 case GETUSTACK:
598 ustack32 = (caddr32_t)lwp->lwp_ustack;
599 if (copyout(&ustack32, arg, sizeof (ustack32)))
600 return (set_errno(EFAULT));
601 return (0);
602
603 case SETUSTACK:
604 if (copyin(arg, &dummy_stk32, sizeof (dummy_stk32)))
605 return (set_errno(EFAULT));
606 lwp->lwp_ustack = (uintptr_t)arg;
607 return (0);
608 }
609 }
610
611 #endif /* _SYSCALL32_IMPL */