Print this page
OS-8347 Update SmartOS to support illumos 13917


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 #include <sys/errno.h>
  31 #include <sys/systm.h>
  32 #include <sys/archsystm.h>
  33 #include <sys/privregs.h>
  34 #include <sys/exec.h>
  35 #include <sys/lwp.h>
  36 #include <sys/sem.h>
  37 #include <sys/brand.h>
  38 #include <sys/lx_brand.h>
  39 #include <sys/lx_misc.h>
  40 #include <sys/lx_siginfo.h>
  41 #include <sys/lx_futex.h>
  42 #include <lx_errno.h>
  43 #include <sys/lx_userhz.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/siginfo.h>
  46 #include <sys/contract/process_impl.h>
  47 #include <sys/x86_archext.h>
  48 #include <sys/sdt.h>
  49 #include <lx_signum.h>
  50 #include <lx_syscall.h>
  51 #include <sys/proc.h>
  52 #include <sys/procfs.h>
  53 #include <net/if.h>
  54 #include <inet/ip6.h>
  55 #include <sys/sunddi.h>
  56 #include <sys/dlpi.h>
  57 #include <sys/sysmacros.h>
  58 
  59 /* Linux specific functions and definitions */
  60 static void lx_save(klwp_t *);
  61 static void lx_restore(klwp_t *);
  62 








  63 /*
  64  * Set the return code for the forked child, always zero
  65  */
  66 /*ARGSUSED*/
  67 void
  68 lx_setrval(klwp_t *lwp, int v1, int v2)
  69 {
  70         lwptoregs(lwp)->r_r0 = 0;
  71 }
  72 
  73 /*
  74  * Reset process state on exec(2)
  75  */
  76 void
  77 lx_exec()
  78 {
  79         klwp_t *lwp = ttolwp(curthread);
  80         struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
  81         proc_t *p = ttoproc(curthread);
  82         lx_proc_data_t *pd = ptolxproc(p);


 103          * Inform ptrace(2) that we are processing an execve(2) call so that if
 104          * we are traced we can post either the PTRACE_EVENT_EXEC event or the
 105          * legacy SIGTRAP.
 106          */
 107         (void) lx_ptrace_stop_for_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0, 0);
 108 
 109         /* clear the fs/gsbase values until the app. can reinitialize them */
 110         lwpd->br_lx_fsbase = (uintptr_t)NULL;
 111         lwpd->br_ntv_fsbase = (uintptr_t)NULL;
 112         lwpd->br_lx_gsbase = (uintptr_t)NULL;
 113         lwpd->br_ntv_gsbase = (uintptr_t)NULL;
 114 
 115         /*
 116          * Clear the native stack flags.  This will be reinitialised by
 117          * lx_init() in the new process image.
 118          */
 119         lwpd->br_stack_mode = LX_STACK_MODE_PREINIT;
 120         lwpd->br_ntv_stack = 0;
 121         lwpd->br_ntv_stack_current = 0;
 122 
 123         installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save,
 124             NULL, NULL);
 125 
 126         /*
 127          * clear out the tls array
 128          */
 129         bzero(lwpd->br_tls, sizeof (lwpd->br_tls));
 130 
 131         /*
 132          * reset the tls entries in the gdt
 133          */
 134         kpreempt_disable();
 135         lx_restore(lwp);
 136         kpreempt_enable();
 137 
 138         /*
 139          * The exec syscall doesn't return (so we don't call lx_syscall_return)
 140          * but for our ptrace emulation we need to do this so that a tracer
 141          * does not get out of sync. We know that by the time this lx_exec
 142          * function is called that the exec has succeeded.
 143          */
 144         rp->r_r0 = 0;


 329         } else {
 330                 mutex_exit(&lxzdata->lxzd_lock);
 331         }
 332 
 333         /*
 334          * It is possible for the lx_freelwp hook to be called without a prior
 335          * call to lx_exitlwp being made.  This happens as part of lwp
 336          * de-branding when a native binary is executed from a branded process.
 337          *
 338          * To cover all cases, lx_cleanlwp is called from lx_exitlwp as well
 339          * here in lx_freelwp.  When the second call is redundant, the
 340          * resources will already be freed and no work will be needed.
 341          */
 342         lx_cleanlwp(lwp, p);
 343 
 344         /*
 345          * Remove our system call interposer.
 346          */
 347         lwp->lwp_brand_syscall = NULL;
 348 
 349         (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
 350             lx_save, NULL);




 351         if (lwpd->br_pid != 0) {
 352                 lx_pid_rele(lwptoproc(lwp)->p_pid, lwptot(lwp)->t_tid);
 353         }
 354 
 355         /*
 356          * Discard the affinity mask.
 357          */
 358         VERIFY(lwpd->br_affinitymask != NULL);
 359         cpuset_free(lwpd->br_affinitymask);
 360         lwpd->br_affinitymask = NULL;
 361 
 362         /*
 363          * Ensure that lx_ptrace_exit() has been called to detach
 364          * ptrace(2) tracers and tracees.
 365          */
 366         VERIFY(lwpd->br_ptrace_tracer == NULL);
 367         VERIFY(lwpd->br_ptrace_accord == NULL);
 368 
 369         lwp->lwp_brand = NULL;
 370         kmem_free(lwpd, sizeof (struct lx_lwp_data));


 481          * held at that point, the only time we can guarantee a new pid isn't
 482          * needed is when p_lwpcnt == 0.  This is because other lwps won't be
 483          * present to race with us with regards to pid allocation.
 484          *
 485          * This means that in all other cases (where p_lwpcnt > 0), we expect
 486          * that lx_lwpdata_alloc will allocate a pid for us to use here, even
 487          * if it is uneeded.  If this process is undergoing an exec, for
 488          * example, the single existing lwp will not need a new pid when it is
 489          * rebranded.  In that case, lx_pid_assign will free the uneeded pid.
 490          */
 491         VERIFY(lwpd->br_lpid->lxp_pidp != NULL || p->p_lwpcnt == 0);
 492 
 493         lx_pid_assign(tp, lwpd->br_lpid);
 494         lwpd->br_tgid = lwpd->br_pid;
 495         /*
 496          * Having performed the lx pid assignement, the lpid reference is no
 497          * longer needed.  The underlying data will be freed during lx_freelwp.
 498          */
 499         lwpd->br_lpid = NULL;
 500 
 501         installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
 502             lx_save, NULL, NULL);
 503 
 504         /*
 505          * Install branded system call hooks for this LWP:
 506          */
 507         lwp->lwp_brand_syscall = lx_syscall_enter;
 508 
 509         /*
 510          * The new LWP inherits the parent LWP cgroup ID.
 511          */
 512         if (plwpd != NULL) {
 513                 lwpd->br_cgroupid = plwpd->br_cgroupid;
 514         }
 515         /*
 516          * The new LWP inherits the parent LWP emulated scheduling info.
 517          */
 518         if (plwpd != NULL) {
 519                 lwpd->br_schd_class = plwpd->br_schd_class;
 520                 lwpd->br_schd_pri = plwpd->br_schd_pri;
 521                 lwpd->br_schd_flags = plwpd->br_schd_flags;
 522                 lwpd->br_schd_runtime = plwpd->br_schd_runtime;


 596         lx_affinity_forklwp(srclwp, dstlwp);
 597 
 598         /*
 599          * Flag so child doesn't ptrace-stop on syscall exit.
 600          */
 601         dst->br_ptrace_flags |= LX_PTF_NOSTOP;
 602 
 603         if (src->br_clone_grp_flags != 0) {
 604                 lx_clone_grp_enter(src->br_clone_grp_flags, lwptoproc(srclwp),
 605                     lwptoproc(dstlwp));
 606                 /* clone group no longer pending on this thread */
 607                 src->br_clone_grp_flags = 0;
 608         }
 609 }
 610 
 611 /*
 612  * When switching a Linux process off the CPU, clear its GDT entries.
 613  */
 614 /* ARGSUSED */
 615 static void
 616 lx_save(klwp_t *t)
 617 {

 618         int i;
 619 
 620 #if defined(__amd64)
 621         reset_sregs();
 622 #endif
 623         for (i = 0; i < LX_TLSNUM; i++)
 624                 gdt_update_usegd(GDT_TLSMIN + i, &null_udesc);
 625 }
 626 
 627 /*
 628  * When switching a Linux process on the CPU, set its GDT entries.
 629  *
 630  * For 64-bit code we don't have to worry about explicitly setting the
 631  * %fsbase via wrmsr(MSR_AMD_FSBASE) here. Instead, that should happen
 632  * automatically in update_sregs if we are executing in user-land. If this
 633  * is the case then pcb_rupdate should be set.
 634  */
 635 static void
 636 lx_restore(klwp_t *t)
 637 {

 638         struct lx_lwp_data *lwpd = lwptolxlwp(t);
 639         user_desc_t *tls;
 640         int i;
 641 
 642         ASSERT(lwpd);
 643 
 644         tls = lwpd->br_tls;
 645         for (i = 0; i < LX_TLSNUM; i++)
 646                 gdt_update_usegd(GDT_TLSMIN + i, &tls[i]);
 647 }
 648 
 649 void
 650 lx_set_gdt(int entry, user_desc_t *descrp)
 651 {
 652 
 653         gdt_update_usegd(entry, descrp);
 654 }
 655 
 656 void
 657 lx_clear_gdt(int entry)




   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2022 Joyent, Inc.
  28  */
  29 
  30 #include <sys/errno.h>
  31 #include <sys/systm.h>
  32 #include <sys/archsystm.h>
  33 #include <sys/privregs.h>
  34 #include <sys/exec.h>
  35 #include <sys/lwp.h>
  36 #include <sys/sem.h>
  37 #include <sys/brand.h>
  38 #include <sys/lx_brand.h>
  39 #include <sys/lx_misc.h>
  40 #include <sys/lx_siginfo.h>
  41 #include <sys/lx_futex.h>
  42 #include <lx_errno.h>
  43 #include <sys/lx_userhz.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/siginfo.h>
  46 #include <sys/contract/process_impl.h>
  47 #include <sys/x86_archext.h>
  48 #include <sys/sdt.h>
  49 #include <lx_signum.h>
  50 #include <lx_syscall.h>
  51 #include <sys/proc.h>
  52 #include <sys/procfs.h>
  53 #include <net/if.h>
  54 #include <inet/ip6.h>
  55 #include <sys/sunddi.h>
  56 #include <sys/dlpi.h>
  57 #include <sys/sysmacros.h>
  58 
  59 /* Linux specific functions and definitions */
  60 static void lx_save(void *);
  61 static void lx_restore(void *);
  62 
  63 /* Context op template. */
  64 static const struct ctxop_template lx_ctxop_template = {
  65         .ct_rev         = CTXOP_TPL_REV,
  66         .ct_save        = lx_save,
  67         .ct_restore     = lx_restore,
  68         .ct_exit        = lx_save,
  69 };
  70 
  71 /*
  72  * Set the return code for the forked child, always zero
  73  */
  74 /*ARGSUSED*/
  75 void
  76 lx_setrval(klwp_t *lwp, int v1, int v2)
  77 {
  78         lwptoregs(lwp)->r_r0 = 0;
  79 }
  80 
  81 /*
  82  * Reset process state on exec(2)
  83  */
  84 void
  85 lx_exec()
  86 {
  87         klwp_t *lwp = ttolwp(curthread);
  88         struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
  89         proc_t *p = ttoproc(curthread);
  90         lx_proc_data_t *pd = ptolxproc(p);


 111          * Inform ptrace(2) that we are processing an execve(2) call so that if
 112          * we are traced we can post either the PTRACE_EVENT_EXEC event or the
 113          * legacy SIGTRAP.
 114          */
 115         (void) lx_ptrace_stop_for_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0, 0);
 116 
 117         /* clear the fs/gsbase values until the app. can reinitialize them */
 118         lwpd->br_lx_fsbase = (uintptr_t)NULL;
 119         lwpd->br_ntv_fsbase = (uintptr_t)NULL;
 120         lwpd->br_lx_gsbase = (uintptr_t)NULL;
 121         lwpd->br_ntv_gsbase = (uintptr_t)NULL;
 122 
 123         /*
 124          * Clear the native stack flags.  This will be reinitialised by
 125          * lx_init() in the new process image.
 126          */
 127         lwpd->br_stack_mode = LX_STACK_MODE_PREINIT;
 128         lwpd->br_ntv_stack = 0;
 129         lwpd->br_ntv_stack_current = 0;
 130 
 131         ctxop_install(lwptot(lwp), &lx_ctxop_template, lwp);

 132 
 133         /*
 134          * clear out the tls array
 135          */
 136         bzero(lwpd->br_tls, sizeof (lwpd->br_tls));
 137 
 138         /*
 139          * reset the tls entries in the gdt
 140          */
 141         kpreempt_disable();
 142         lx_restore(lwp);
 143         kpreempt_enable();
 144 
 145         /*
 146          * The exec syscall doesn't return (so we don't call lx_syscall_return)
 147          * but for our ptrace emulation we need to do this so that a tracer
 148          * does not get out of sync. We know that by the time this lx_exec
 149          * function is called that the exec has succeeded.
 150          */
 151         rp->r_r0 = 0;


 336         } else {
 337                 mutex_exit(&lxzdata->lxzd_lock);
 338         }
 339 
 340         /*
 341          * It is possible for the lx_freelwp hook to be called without a prior
 342          * call to lx_exitlwp being made.  This happens as part of lwp
 343          * de-branding when a native binary is executed from a branded process.
 344          *
 345          * To cover all cases, lx_cleanlwp is called from lx_exitlwp as well
 346          * here in lx_freelwp.  When the second call is redundant, the
 347          * resources will already be freed and no work will be needed.
 348          */
 349         lx_cleanlwp(lwp, p);
 350 
 351         /*
 352          * Remove our system call interposer.
 353          */
 354         lwp->lwp_brand_syscall = NULL;
 355 
 356         /*
 357          * If this process is being de-branded during an exec(),
 358          * the LX ctxops may have already been removed, so the result
 359          * from ctxop_remove is irrelevant.
 360          */
 361         (void) ctxop_remove(lwptot(lwp), &lx_ctxop_template, lwp);
 362         if (lwpd->br_pid != 0) {
 363                 lx_pid_rele(lwptoproc(lwp)->p_pid, lwptot(lwp)->t_tid);
 364         }
 365 
 366         /*
 367          * Discard the affinity mask.
 368          */
 369         VERIFY(lwpd->br_affinitymask != NULL);
 370         cpuset_free(lwpd->br_affinitymask);
 371         lwpd->br_affinitymask = NULL;
 372 
 373         /*
 374          * Ensure that lx_ptrace_exit() has been called to detach
 375          * ptrace(2) tracers and tracees.
 376          */
 377         VERIFY(lwpd->br_ptrace_tracer == NULL);
 378         VERIFY(lwpd->br_ptrace_accord == NULL);
 379 
 380         lwp->lwp_brand = NULL;
 381         kmem_free(lwpd, sizeof (struct lx_lwp_data));


 492          * held at that point, the only time we can guarantee a new pid isn't
 493          * needed is when p_lwpcnt == 0.  This is because other lwps won't be
 494          * present to race with us with regards to pid allocation.
 495          *
 496          * This means that in all other cases (where p_lwpcnt > 0), we expect
 497          * that lx_lwpdata_alloc will allocate a pid for us to use here, even
 498          * if it is uneeded.  If this process is undergoing an exec, for
 499          * example, the single existing lwp will not need a new pid when it is
 500          * rebranded.  In that case, lx_pid_assign will free the uneeded pid.
 501          */
 502         VERIFY(lwpd->br_lpid->lxp_pidp != NULL || p->p_lwpcnt == 0);
 503 
 504         lx_pid_assign(tp, lwpd->br_lpid);
 505         lwpd->br_tgid = lwpd->br_pid;
 506         /*
 507          * Having performed the lx pid assignement, the lpid reference is no
 508          * longer needed.  The underlying data will be freed during lx_freelwp.
 509          */
 510         lwpd->br_lpid = NULL;
 511 
 512         ctxop_install(lwptot(lwp), &lx_ctxop_template, lwp);

 513 
 514         /*
 515          * Install branded system call hooks for this LWP:
 516          */
 517         lwp->lwp_brand_syscall = lx_syscall_enter;
 518 
 519         /*
 520          * The new LWP inherits the parent LWP cgroup ID.
 521          */
 522         if (plwpd != NULL) {
 523                 lwpd->br_cgroupid = plwpd->br_cgroupid;
 524         }
 525         /*
 526          * The new LWP inherits the parent LWP emulated scheduling info.
 527          */
 528         if (plwpd != NULL) {
 529                 lwpd->br_schd_class = plwpd->br_schd_class;
 530                 lwpd->br_schd_pri = plwpd->br_schd_pri;
 531                 lwpd->br_schd_flags = plwpd->br_schd_flags;
 532                 lwpd->br_schd_runtime = plwpd->br_schd_runtime;


 606         lx_affinity_forklwp(srclwp, dstlwp);
 607 
 608         /*
 609          * Flag so child doesn't ptrace-stop on syscall exit.
 610          */
 611         dst->br_ptrace_flags |= LX_PTF_NOSTOP;
 612 
 613         if (src->br_clone_grp_flags != 0) {
 614                 lx_clone_grp_enter(src->br_clone_grp_flags, lwptoproc(srclwp),
 615                     lwptoproc(dstlwp));
 616                 /* clone group no longer pending on this thread */
 617                 src->br_clone_grp_flags = 0;
 618         }
 619 }
 620 
 621 /*
 622  * When switching a Linux process off the CPU, clear its GDT entries.
 623  */
 624 /* ARGSUSED */
 625 static void
 626 lx_save(void *arg)
 627 {
 628         klwp_t *t = (klwp_t *)arg;
 629         int i;
 630 
 631 #if defined(__amd64)
 632         reset_sregs();
 633 #endif
 634         for (i = 0; i < LX_TLSNUM; i++)
 635                 gdt_update_usegd(GDT_TLSMIN + i, &null_udesc);
 636 }
 637 
 638 /*
 639  * When switching a Linux process on the CPU, set its GDT entries.
 640  *
 641  * For 64-bit code we don't have to worry about explicitly setting the
 642  * %fsbase via wrmsr(MSR_AMD_FSBASE) here. Instead, that should happen
 643  * automatically in update_sregs if we are executing in user-land. If this
 644  * is the case then pcb_rupdate should be set.
 645  */
 646 static void
 647 lx_restore(void *arg)
 648 {
 649         klwp_t *t = (klwp_t *)arg;
 650         struct lx_lwp_data *lwpd = lwptolxlwp(t);
 651         user_desc_t *tls;
 652         int i;
 653 
 654         ASSERT(lwpd);
 655 
 656         tls = lwpd->br_tls;
 657         for (i = 0; i < LX_TLSNUM; i++)
 658                 gdt_update_usegd(GDT_TLSMIN + i, &tls[i]);
 659 }
 660 
 661 void
 662 lx_set_gdt(int entry, user_desc_t *descrp)
 663 {
 664 
 665         gdt_update_usegd(entry, descrp);
 666 }
 667 
 668 void
 669 lx_clear_gdt(int entry)