7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2019 Joyent, Inc.
28 */
29
30 #include <sys/errno.h>
31 #include <sys/systm.h>
32 #include <sys/archsystm.h>
33 #include <sys/privregs.h>
34 #include <sys/exec.h>
35 #include <sys/lwp.h>
36 #include <sys/sem.h>
37 #include <sys/brand.h>
38 #include <sys/lx_brand.h>
39 #include <sys/lx_misc.h>
40 #include <sys/lx_siginfo.h>
41 #include <sys/lx_futex.h>
42 #include <lx_errno.h>
43 #include <sys/lx_userhz.h>
44 #include <sys/cmn_err.h>
45 #include <sys/siginfo.h>
46 #include <sys/contract/process_impl.h>
47 #include <sys/x86_archext.h>
48 #include <sys/sdt.h>
49 #include <lx_signum.h>
50 #include <lx_syscall.h>
51 #include <sys/proc.h>
52 #include <sys/procfs.h>
53 #include <net/if.h>
54 #include <inet/ip6.h>
55 #include <sys/sunddi.h>
56 #include <sys/dlpi.h>
57 #include <sys/sysmacros.h>
58
59 /* Linux specific functions and definitions */
60 static void lx_save(klwp_t *);
61 static void lx_restore(klwp_t *);
62
63 /*
64 * Set the return code for the forked child, always zero
65 */
66 /*ARGSUSED*/
67 void
68 lx_setrval(klwp_t *lwp, int v1, int v2)
69 {
70 lwptoregs(lwp)->r_r0 = 0;
71 }
72
73 /*
74 * Reset process state on exec(2)
75 */
76 void
77 lx_exec()
78 {
79 klwp_t *lwp = ttolwp(curthread);
80 struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
81 proc_t *p = ttoproc(curthread);
82 lx_proc_data_t *pd = ptolxproc(p);
103 * Inform ptrace(2) that we are processing an execve(2) call so that if
104 * we are traced we can post either the PTRACE_EVENT_EXEC event or the
105 * legacy SIGTRAP.
106 */
107 (void) lx_ptrace_stop_for_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0, 0);
108
109 /* clear the fs/gsbase values until the app. can reinitialize them */
110 lwpd->br_lx_fsbase = (uintptr_t)NULL;
111 lwpd->br_ntv_fsbase = (uintptr_t)NULL;
112 lwpd->br_lx_gsbase = (uintptr_t)NULL;
113 lwpd->br_ntv_gsbase = (uintptr_t)NULL;
114
115 /*
116 * Clear the native stack flags. This will be reinitialised by
117 * lx_init() in the new process image.
118 */
119 lwpd->br_stack_mode = LX_STACK_MODE_PREINIT;
120 lwpd->br_ntv_stack = 0;
121 lwpd->br_ntv_stack_current = 0;
122
123 installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save,
124 NULL, NULL);
125
126 /*
127 * clear out the tls array
128 */
129 bzero(lwpd->br_tls, sizeof (lwpd->br_tls));
130
131 /*
132 * reset the tls entries in the gdt
133 */
134 kpreempt_disable();
135 lx_restore(lwp);
136 kpreempt_enable();
137
138 /*
139 * The exec syscall doesn't return (so we don't call lx_syscall_return)
140 * but for our ptrace emulation we need to do this so that a tracer
141 * does not get out of sync. We know that by the time this lx_exec
142 * function is called that the exec has succeeded.
143 */
144 rp->r_r0 = 0;
329 } else {
330 mutex_exit(&lxzdata->lxzd_lock);
331 }
332
333 /*
334 * It is possible for the lx_freelwp hook to be called without a prior
335 * call to lx_exitlwp being made. This happens as part of lwp
336 * de-branding when a native binary is executed from a branded process.
337 *
338 * To cover all cases, lx_cleanlwp is called from lx_exitlwp as well
339 * here in lx_freelwp. When the second call is redundant, the
340 * resources will already be freed and no work will be needed.
341 */
342 lx_cleanlwp(lwp, p);
343
344 /*
345 * Remove our system call interposer.
346 */
347 lwp->lwp_brand_syscall = NULL;
348
349 (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
350 lx_save, NULL);
351 if (lwpd->br_pid != 0) {
352 lx_pid_rele(lwptoproc(lwp)->p_pid, lwptot(lwp)->t_tid);
353 }
354
355 /*
356 * Discard the affinity mask.
357 */
358 VERIFY(lwpd->br_affinitymask != NULL);
359 cpuset_free(lwpd->br_affinitymask);
360 lwpd->br_affinitymask = NULL;
361
362 /*
363 * Ensure that lx_ptrace_exit() has been called to detach
364 * ptrace(2) tracers and tracees.
365 */
366 VERIFY(lwpd->br_ptrace_tracer == NULL);
367 VERIFY(lwpd->br_ptrace_accord == NULL);
368
369 lwp->lwp_brand = NULL;
370 kmem_free(lwpd, sizeof (struct lx_lwp_data));
481 * held at that point, the only time we can guarantee a new pid isn't
482 * needed is when p_lwpcnt == 0. This is because other lwps won't be
483 * present to race with us with regards to pid allocation.
484 *
485 * This means that in all other cases (where p_lwpcnt > 0), we expect
486 * that lx_lwpdata_alloc will allocate a pid for us to use here, even
487 * if it is uneeded. If this process is undergoing an exec, for
488 * example, the single existing lwp will not need a new pid when it is
489 * rebranded. In that case, lx_pid_assign will free the uneeded pid.
490 */
491 VERIFY(lwpd->br_lpid->lxp_pidp != NULL || p->p_lwpcnt == 0);
492
493 lx_pid_assign(tp, lwpd->br_lpid);
494 lwpd->br_tgid = lwpd->br_pid;
495 /*
496 * Having performed the lx pid assignement, the lpid reference is no
497 * longer needed. The underlying data will be freed during lx_freelwp.
498 */
499 lwpd->br_lpid = NULL;
500
501 installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
502 lx_save, NULL, NULL);
503
504 /*
505 * Install branded system call hooks for this LWP:
506 */
507 lwp->lwp_brand_syscall = lx_syscall_enter;
508
509 /*
510 * The new LWP inherits the parent LWP cgroup ID.
511 */
512 if (plwpd != NULL) {
513 lwpd->br_cgroupid = plwpd->br_cgroupid;
514 }
515 /*
516 * The new LWP inherits the parent LWP emulated scheduling info.
517 */
518 if (plwpd != NULL) {
519 lwpd->br_schd_class = plwpd->br_schd_class;
520 lwpd->br_schd_pri = plwpd->br_schd_pri;
521 lwpd->br_schd_flags = plwpd->br_schd_flags;
522 lwpd->br_schd_runtime = plwpd->br_schd_runtime;
596 lx_affinity_forklwp(srclwp, dstlwp);
597
598 /*
599 * Flag so child doesn't ptrace-stop on syscall exit.
600 */
601 dst->br_ptrace_flags |= LX_PTF_NOSTOP;
602
603 if (src->br_clone_grp_flags != 0) {
604 lx_clone_grp_enter(src->br_clone_grp_flags, lwptoproc(srclwp),
605 lwptoproc(dstlwp));
606 /* clone group no longer pending on this thread */
607 src->br_clone_grp_flags = 0;
608 }
609 }
610
611 /*
612 * When switching a Linux process off the CPU, clear its GDT entries.
613 */
614 /* ARGSUSED */
615 static void
616 lx_save(klwp_t *t)
617 {
618 int i;
619
620 #if defined(__amd64)
621 reset_sregs();
622 #endif
623 for (i = 0; i < LX_TLSNUM; i++)
624 gdt_update_usegd(GDT_TLSMIN + i, &null_udesc);
625 }
626
627 /*
628 * When switching a Linux process on the CPU, set its GDT entries.
629 *
630 * For 64-bit code we don't have to worry about explicitly setting the
631 * %fsbase via wrmsr(MSR_AMD_FSBASE) here. Instead, that should happen
632 * automatically in update_sregs if we are executing in user-land. If this
633 * is the case then pcb_rupdate should be set.
634 */
635 static void
636 lx_restore(klwp_t *t)
637 {
638 struct lx_lwp_data *lwpd = lwptolxlwp(t);
639 user_desc_t *tls;
640 int i;
641
642 ASSERT(lwpd);
643
644 tls = lwpd->br_tls;
645 for (i = 0; i < LX_TLSNUM; i++)
646 gdt_update_usegd(GDT_TLSMIN + i, &tls[i]);
647 }
648
649 void
650 lx_set_gdt(int entry, user_desc_t *descrp)
651 {
652
653 gdt_update_usegd(entry, descrp);
654 }
655
656 void
657 lx_clear_gdt(int entry)
|
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2022 Joyent, Inc.
28 */
29
30 #include <sys/errno.h>
31 #include <sys/systm.h>
32 #include <sys/archsystm.h>
33 #include <sys/privregs.h>
34 #include <sys/exec.h>
35 #include <sys/lwp.h>
36 #include <sys/sem.h>
37 #include <sys/brand.h>
38 #include <sys/lx_brand.h>
39 #include <sys/lx_misc.h>
40 #include <sys/lx_siginfo.h>
41 #include <sys/lx_futex.h>
42 #include <lx_errno.h>
43 #include <sys/lx_userhz.h>
44 #include <sys/cmn_err.h>
45 #include <sys/siginfo.h>
46 #include <sys/contract/process_impl.h>
47 #include <sys/x86_archext.h>
48 #include <sys/sdt.h>
49 #include <lx_signum.h>
50 #include <lx_syscall.h>
51 #include <sys/proc.h>
52 #include <sys/procfs.h>
53 #include <net/if.h>
54 #include <inet/ip6.h>
55 #include <sys/sunddi.h>
56 #include <sys/dlpi.h>
57 #include <sys/sysmacros.h>
58
59 /* Linux specific functions and definitions */
60 static void lx_save(void *);
61 static void lx_restore(void *);
62
63 /* Context op template. */
64 static const struct ctxop_template lx_ctxop_template = {
65 .ct_rev = CTXOP_TPL_REV,
66 .ct_save = lx_save,
67 .ct_restore = lx_restore,
68 .ct_exit = lx_save,
69 };
70
71 /*
72 * Set the return code for the forked child, always zero
73 */
74 /*ARGSUSED*/
75 void
76 lx_setrval(klwp_t *lwp, int v1, int v2)
77 {
78 lwptoregs(lwp)->r_r0 = 0;
79 }
80
81 /*
82 * Reset process state on exec(2)
83 */
84 void
85 lx_exec()
86 {
87 klwp_t *lwp = ttolwp(curthread);
88 struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
89 proc_t *p = ttoproc(curthread);
90 lx_proc_data_t *pd = ptolxproc(p);
111 * Inform ptrace(2) that we are processing an execve(2) call so that if
112 * we are traced we can post either the PTRACE_EVENT_EXEC event or the
113 * legacy SIGTRAP.
114 */
115 (void) lx_ptrace_stop_for_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0, 0);
116
117 /* clear the fs/gsbase values until the app. can reinitialize them */
118 lwpd->br_lx_fsbase = (uintptr_t)NULL;
119 lwpd->br_ntv_fsbase = (uintptr_t)NULL;
120 lwpd->br_lx_gsbase = (uintptr_t)NULL;
121 lwpd->br_ntv_gsbase = (uintptr_t)NULL;
122
123 /*
124 * Clear the native stack flags. This will be reinitialised by
125 * lx_init() in the new process image.
126 */
127 lwpd->br_stack_mode = LX_STACK_MODE_PREINIT;
128 lwpd->br_ntv_stack = 0;
129 lwpd->br_ntv_stack_current = 0;
130
131 ctxop_install(lwptot(lwp), &lx_ctxop_template, lwp);
132
133 /*
134 * clear out the tls array
135 */
136 bzero(lwpd->br_tls, sizeof (lwpd->br_tls));
137
138 /*
139 * reset the tls entries in the gdt
140 */
141 kpreempt_disable();
142 lx_restore(lwp);
143 kpreempt_enable();
144
145 /*
146 * The exec syscall doesn't return (so we don't call lx_syscall_return)
147 * but for our ptrace emulation we need to do this so that a tracer
148 * does not get out of sync. We know that by the time this lx_exec
149 * function is called that the exec has succeeded.
150 */
151 rp->r_r0 = 0;
336 } else {
337 mutex_exit(&lxzdata->lxzd_lock);
338 }
339
340 /*
341 * It is possible for the lx_freelwp hook to be called without a prior
342 * call to lx_exitlwp being made. This happens as part of lwp
343 * de-branding when a native binary is executed from a branded process.
344 *
345 * To cover all cases, lx_cleanlwp is called from lx_exitlwp as well
346 * here in lx_freelwp. When the second call is redundant, the
347 * resources will already be freed and no work will be needed.
348 */
349 lx_cleanlwp(lwp, p);
350
351 /*
352 * Remove our system call interposer.
353 */
354 lwp->lwp_brand_syscall = NULL;
355
356 /*
357 * If this process is being de-branded during an exec(),
358 * the LX ctxops may have already been removed, so the result
359 * from ctxop_remove is irrelevant.
360 */
361 (void) ctxop_remove(lwptot(lwp), &lx_ctxop_template, lwp);
362 if (lwpd->br_pid != 0) {
363 lx_pid_rele(lwptoproc(lwp)->p_pid, lwptot(lwp)->t_tid);
364 }
365
366 /*
367 * Discard the affinity mask.
368 */
369 VERIFY(lwpd->br_affinitymask != NULL);
370 cpuset_free(lwpd->br_affinitymask);
371 lwpd->br_affinitymask = NULL;
372
373 /*
374 * Ensure that lx_ptrace_exit() has been called to detach
375 * ptrace(2) tracers and tracees.
376 */
377 VERIFY(lwpd->br_ptrace_tracer == NULL);
378 VERIFY(lwpd->br_ptrace_accord == NULL);
379
380 lwp->lwp_brand = NULL;
381 kmem_free(lwpd, sizeof (struct lx_lwp_data));
492 * held at that point, the only time we can guarantee a new pid isn't
493 * needed is when p_lwpcnt == 0. This is because other lwps won't be
494 * present to race with us with regards to pid allocation.
495 *
496 * This means that in all other cases (where p_lwpcnt > 0), we expect
497 * that lx_lwpdata_alloc will allocate a pid for us to use here, even
498 * if it is uneeded. If this process is undergoing an exec, for
499 * example, the single existing lwp will not need a new pid when it is
500 * rebranded. In that case, lx_pid_assign will free the uneeded pid.
501 */
502 VERIFY(lwpd->br_lpid->lxp_pidp != NULL || p->p_lwpcnt == 0);
503
504 lx_pid_assign(tp, lwpd->br_lpid);
505 lwpd->br_tgid = lwpd->br_pid;
506 /*
507 * Having performed the lx pid assignement, the lpid reference is no
508 * longer needed. The underlying data will be freed during lx_freelwp.
509 */
510 lwpd->br_lpid = NULL;
511
512 ctxop_install(lwptot(lwp), &lx_ctxop_template, lwp);
513
514 /*
515 * Install branded system call hooks for this LWP:
516 */
517 lwp->lwp_brand_syscall = lx_syscall_enter;
518
519 /*
520 * The new LWP inherits the parent LWP cgroup ID.
521 */
522 if (plwpd != NULL) {
523 lwpd->br_cgroupid = plwpd->br_cgroupid;
524 }
525 /*
526 * The new LWP inherits the parent LWP emulated scheduling info.
527 */
528 if (plwpd != NULL) {
529 lwpd->br_schd_class = plwpd->br_schd_class;
530 lwpd->br_schd_pri = plwpd->br_schd_pri;
531 lwpd->br_schd_flags = plwpd->br_schd_flags;
532 lwpd->br_schd_runtime = plwpd->br_schd_runtime;
606 lx_affinity_forklwp(srclwp, dstlwp);
607
608 /*
609 * Flag so child doesn't ptrace-stop on syscall exit.
610 */
611 dst->br_ptrace_flags |= LX_PTF_NOSTOP;
612
613 if (src->br_clone_grp_flags != 0) {
614 lx_clone_grp_enter(src->br_clone_grp_flags, lwptoproc(srclwp),
615 lwptoproc(dstlwp));
616 /* clone group no longer pending on this thread */
617 src->br_clone_grp_flags = 0;
618 }
619 }
620
621 /*
622 * When switching a Linux process off the CPU, clear its GDT entries.
623 */
624 /* ARGSUSED */
625 static void
626 lx_save(void *arg)
627 {
628 klwp_t *t = (klwp_t *)arg;
629 int i;
630
631 #if defined(__amd64)
632 reset_sregs();
633 #endif
634 for (i = 0; i < LX_TLSNUM; i++)
635 gdt_update_usegd(GDT_TLSMIN + i, &null_udesc);
636 }
637
638 /*
639 * When switching a Linux process on the CPU, set its GDT entries.
640 *
641 * For 64-bit code we don't have to worry about explicitly setting the
642 * %fsbase via wrmsr(MSR_AMD_FSBASE) here. Instead, that should happen
643 * automatically in update_sregs if we are executing in user-land. If this
644 * is the case then pcb_rupdate should be set.
645 */
646 static void
647 lx_restore(void *arg)
648 {
649 klwp_t *t = (klwp_t *)arg;
650 struct lx_lwp_data *lwpd = lwptolxlwp(t);
651 user_desc_t *tls;
652 int i;
653
654 ASSERT(lwpd);
655
656 tls = lwpd->br_tls;
657 for (i = 0; i < LX_TLSNUM; i++)
658 gdt_update_usegd(GDT_TLSMIN + i, &tls[i]);
659 }
660
661 void
662 lx_set_gdt(int entry, user_desc_t *descrp)
663 {
664
665 gdt_update_usegd(entry, descrp);
666 }
667
668 void
669 lx_clear_gdt(int entry)
|