Print this page
OS-4818 contract template disappears on exec
OS-4460 exec brands processes that still have multiple threads
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4151 setbrand hooks should be sane during fork
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4144 panic in lx_freelwp during zone shutdown
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4129 lxbrand should not abuse p_brand_data for storing exit signal
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3820 lxbrand ptrace(2): the next generation
OS-3685 lxbrand PTRACE_O_TRACEFORK race condition
OS-3834 lxbrand 64-bit strace(1) reports 64-bit process as using x32 ABI
OS-3794 lxbrand panic on init signal death
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3140 In LX zone 'ps fax' does not show all processes
OS-3429 Expose zone's init exit status
OS-3149 lx brand always sends SIGCHLD to parent processes, regardless of how clone was invoked
OS-2887 lxbrand add WALL, WCLONE, WNOTHREAD support to waitid
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
        
*** 19,29 ****
   * CDDL HEADER END
   */
  
  /*
   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
!  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
   */
  
  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  
  #include <sys/types.h>
--- 19,29 ----
   * CDDL HEADER END
   */
  
  /*
   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
!  * Copyright 2014 Joyent, Inc. All rights reserved.
   */
  
  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  
  #include <sys/types.h>
*** 228,238 ****
          lwp->lwp_extsig = 0;
          if (lwp->lwp_curinfo != NULL) {
                  siginfofree(lwp->lwp_curinfo);
                  lwp->lwp_curinfo = NULL;
          }
!         lwp_ctmpl_clear(lwp);
  
          /*
           * Reset both the process root directory and the current working
           * directory to the root of the zone just as we do during boot.
           */
--- 228,238 ----
          lwp->lwp_extsig = 0;
          if (lwp->lwp_curinfo != NULL) {
                  siginfofree(lwp->lwp_curinfo);
                  lwp->lwp_curinfo = NULL;
          }
!         lwp_ctmpl_clear(lwp, B_FALSE);
  
          /*
           * Reset both the process root directory and the current working
           * directory to the root of the zone just as we do during boot.
           */
*** 364,387 ****
                  (void) task_cpu_time_incr(p->p_task, p->p_ttime);
                  p->p_ttime = 0;
          }
          mutex_exit(&p->p_lock);
  
-         DTRACE_PROC(lwp__exit);
-         DTRACE_PROC1(exit, int, why);
- 
          /*
-          * Will perform any brand specific proc exit processing, since this
-          * is always the last lwp, will also perform lwp_exit and free brand
-          * data
-          */
-         if (PROC_IS_BRANDED(p)) {
-                 lwp_detach_brand_hdlrs(lwp);
-                 brand_clearbrand(p, B_FALSE);
-         }
- 
-         /*
           * Don't let init exit unless zone_start_init() failed its exec, or
           * we are shutting down the zone or the machine.
           *
           * Since we are single threaded, we don't need to lock the
           * following accesses to zone_proc_initpid.
--- 364,374 ----
*** 388,403 ****
           */
          if (p->p_pid == z->zone_proc_initpid) {
                  if (z->zone_boot_err == 0 &&
                      zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
                      zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
                          if (z->zone_restart_init == B_TRUE) {
                                  if (restart_init(what, why) == 0)
                                          return (0);
!                         } else {
                                  (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
!                                     CRED());
                          }
                  }
  
                  /*
                   * Since we didn't or couldn't restart init, we clear
--- 375,413 ----
           */
          if (p->p_pid == z->zone_proc_initpid) {
                  if (z->zone_boot_err == 0 &&
                      zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
                      zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
+ 
+                         /*
+                          * If the init process should be restarted, the
+                          * "zone_restart_init" member will be set.  Some init
+                          * programs in branded zones do not tolerate a restart
+                          * in the traditional manner; setting the
+                          * "zone_reboot_on_init_exit" member will cause the
+                          * entire zone to be rebooted instead.  If neither of
+                          * these flags is set the zone will shut down.
+                          */
+                         if (z->zone_reboot_on_init_exit == B_TRUE &&
+                             z->zone_restart_init == B_TRUE) {
+                                 /*
+                                  * Trigger a zone reboot and continue
+                                  * with exit processing.
+                                  */
+                                 z->zone_init_status = wstat(why, what);
+                                 (void) zone_kadmin(A_REBOOT, 0, NULL,
+                                     zone_kcred());
+ 
+                         } else {
                                  if (z->zone_restart_init == B_TRUE) {
                                          if (restart_init(what, why) == 0)
                                                  return (0);
!                                 }
! 
!                                 z->zone_init_status = wstat(why, what);
                                  (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
!                                     zone_kcred());
                          }
                  }
  
                  /*
                   * Since we didn't or couldn't restart init, we clear
*** 405,414 ****
--- 415,450 ----
                   * processing.
                   */
                  z->zone_proc_initpid = -1;
          }
  
+         /*
+          * Delay firing probes (and performing brand cleanup) until after the
+          * zone_proc_initpid check. Cases which result in zone shutdown or
+          * restart via zone_kadmin eventually result in a call back to
+          * proc_exit.
+          */
+         DTRACE_PROC(lwp__exit);
+         DTRACE_PROC1(exit, int, why);
+ 
+         /*
+          * Will perform any brand specific proc exit processing. Since this
+          * is always the last lwp, will also perform lwp exit/free and proc
+          * exit. Brand data will be freed when the process is reaped.
+          */
+         if (PROC_IS_BRANDED(p)) {
+                 BROP(p)->b_lwpexit(lwp);
+                 BROP(p)->b_proc_exit(p);
+                 /*
+                  * To ensure that b_proc_exit has access to brand-specific data
+                  * contained by the one remaining lwp, call the freelwp hook as
+                  * the last part of this clean-up process.
+                  */
+                 BROP(p)->b_freelwp(lwp);
+                 lwp_detach_brand_hdlrs(lwp);
+         }
+ 
          lwp_pcb_exit();
  
          /*
           * Allocate a sigqueue now, before we grab locks.
           * It will be given to sigcld(), below.
*** 656,669 ****
           * Don't try to assign init's children to init.
           */
          if ((q = p->p_child) != NULL && p != proc_init) {
                  struct proc     *np;
                  struct proc     *initp = proc_init;
                  boolean_t       setzonetop = B_FALSE;
  
!                 if (!INGLOBALZONE(curproc))
                          setzonetop = B_TRUE;
  
                  pgdetach(p);
  
                  do {
                          np = q->p_sibling;
--- 692,717 ----
           * Don't try to assign init's children to init.
           */
          if ((q = p->p_child) != NULL && p != proc_init) {
                  struct proc     *np;
                  struct proc     *initp = proc_init;
+                 pid_t           zone_initpid = 1;
+                 struct proc     *zoneinitp = NULL;
                  boolean_t       setzonetop = B_FALSE;
  
!                 if (!INGLOBALZONE(curproc)) {
!                         zone_initpid = curproc->p_zone->zone_proc_initpid;
! 
!                         ASSERT(MUTEX_HELD(&pidlock));
!                         zoneinitp = prfind(zone_initpid);
!                         if (zoneinitp != NULL) {
!                                 initp = zoneinitp;
!                         } else {
!                                 zone_initpid = 1;
                                  setzonetop = B_TRUE;
+                         }
+                 }
  
                  pgdetach(p);
  
                  do {
                          np = q->p_sibling;
*** 671,681 ****
                           * Delete it from its current parent new state
                           * list and add it to init new state list
                           */
                          delete_ns(q->p_parent, q);
  
!                         q->p_ppid = 1;
                          q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
                          if (setzonetop) {
                                  mutex_enter(&q->p_lock);
                                  q->p_flag |= SZONETOP;
                                  mutex_exit(&q->p_lock);
--- 719,730 ----
                           * Delete it from its current parent new state
                           * list and add it to init new state list
                           */
                          delete_ns(q->p_parent, q);
  
!                         q->p_ppid = zone_initpid;
! 
                          q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
                          if (setzonetop) {
                                  mutex_enter(&q->p_lock);
                                  q->p_flag |= SZONETOP;
                                  mutex_exit(&q->p_lock);
*** 845,856 ****
--- 894,947 ----
          else
                  t->t_procp = &p0;
  
          mutex_exit(&p->p_lock);
          if (!evaporate) {
+                 /*
+                  * The brand specific code only happens when the brand has a
+                  * function to call in place of sigcld and the parent of the
+                  * exiting process is not the global zone init. If the parent
+                  * is the global zone init, then the process was reparented,
+                  * and we don't want brand code delivering possibly strange
+                  * signals to init. Also, init is not branded, so any brand
+                  * specific exit data will not be picked up by init anyway.
+                  */
+                 if (PROC_IS_BRANDED(p) &&
+                     BROP(p)->b_exit_with_sig != NULL &&
+                     p->p_ppid != 1) {
+                         /*
+                          * The code for _fini that could unload the brand_t
+                          * blocks until the count of zones using the module
+                          * reaches zero. Zones decrement the refcount on their
+                          * brands only after all user tasks in that zone have
+                          * exited and been waited on. The decrement on the
+                          * brand's refcount happen in zone_destroy(). That
+                          * depends on zone_shutdown() having been completed.
+                          * zone_shutdown() includes a call to zone_empty(),
+                          * where the zone waits for itself to reach the state
+                          * ZONE_IS_EMPTY. This state is only set in either
+                          * zone_shutdown(), when there are no user processes as
+                          * the zone enters this function, or in
+                          * zone_task_rele(). zone_task_rele() is called from
+                          * code triggered by waiting on processes, not by the
+                          * processes exiting through proc_exit().  This means
+                          * all the branded processes that could exist for a
+                          * specific brand_t must exit and get reaped before the
+                          * refcount on the brand_t can reach 0. _fini will
+                          * never unload the corresponding brand module before
+                          * proc_exit finishes execution for all processes
+                          * branded with a particular brand_t, which makes the
+                          * operation below safe to do. Brands that wish to use
+                          * this mechanism must wait in _fini as described
+                          * above.
+                          */
+                         BROP(p)->b_exit_with_sig(p, sqp);
+                 } else {
                          p->p_pidflag &= ~CLDPEND;
                          sigcld(p, sqp);
+                 }
+ 
          } else {
                  /*
                   * Do what sigcld() would do if the disposition
                   * of the SIGCHLD signal were set to be ignored.
                   */
*** 925,938 ****
   * and pass back status from them.
   */
  int
  waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
  {
-         int found;
          proc_t *cp, *pp;
-         int proc_gone;
          int waitflag = !(options & WNOWAIT);
  
          /*
           * Obsolete flag, defined here only for binary compatibility
           * with old statically linked executables.  Delete this when
           * we no longer care about these old and broken applications.
--- 1016,1028 ----
   * and pass back status from them.
   */
  int
  waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
  {
          proc_t *cp, *pp;
          int waitflag = !(options & WNOWAIT);
+         boolean_t have_brand_helper = B_FALSE;
  
          /*
           * Obsolete flag, defined here only for binary compatibility
           * with old statically linked executables.  Delete this when
           * we no longer care about these old and broken applications.
*** 956,966 ****
          }
  
          pp = ttoproc(curthread);
  
          /*
!          * lock parent mutex so that sibling chain can be searched.
           */
          mutex_enter(&pidlock);
  
          /*
           * if we are only looking for exited processes and child_ns list
--- 1046,1057 ----
          }
  
          pp = ttoproc(curthread);
  
          /*
!          * Anytime you are looking for a process, you take pidlock to prevent
!          * things from changing as you look.
           */
          mutex_enter(&pidlock);
  
          /*
           * if we are only looking for exited processes and child_ns list
*** 976,996 ****
                  }
                  mutex_exit(&pidlock);
                  return (ECHILD);
          }
  
!         while (pp->p_child != NULL) {
  
!                 proc_gone = 0;
  
                  for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
                          if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
                                  continue;
                          if (idtype == P_PID && id != cp->p_pid)
                                  continue;
                          if (idtype == P_PGID && id != cp->p_pgrp)
                                  continue;
  
                          switch (cp->p_wcode) {
  
                          case CLD_TRAPPED:
                          case CLD_STOPPED:
--- 1067,1119 ----
                  }
                  mutex_exit(&pidlock);
                  return (ECHILD);
          }
  
!         if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
!                 have_brand_helper = B_TRUE;
!         }
  
!         while (pp->p_child != NULL || have_brand_helper) {
!                 boolean_t brand_wants_wait = B_FALSE;
!                 int proc_gone = 0;
!                 int found = 0;
  
+                 /*
+                  * Give the brand a chance to return synthetic results from
+                  * this waitid() call before we do the real thing.
+                  */
+                 if (have_brand_helper) {
+                         int ret;
+ 
+                         if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
+                             &brand_wants_wait, &ret) == 0) {
+                                 mutex_exit(&pidlock);
+                                 return (ret);
+                         }
+ 
+                         if (pp->p_child == NULL) {
+                                 goto no_real_children;
+                         }
+                 }
+ 
+                 /*
+                  * Look for interesting children in the newstate list.
+                  */
+                 VERIFY(pp->p_child != NULL);
                  for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
                          if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
                                  continue;
                          if (idtype == P_PID && id != cp->p_pid)
                                  continue;
                          if (idtype == P_PGID && id != cp->p_pgrp)
                                  continue;
+                         if (PROC_IS_BRANDED(pp)) {
+                                 if (BROP(pp)->b_wait_filter != NULL &&
+                                     BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+                                         continue;
+                         }
  
                          switch (cp->p_wcode) {
  
                          case CLD_TRAPPED:
                          case CLD_STOPPED:
*** 1031,1046 ****
  
                  /*
                   * Wow! None of the threads on the p_sibling_ns list were
                   * interesting threads. Check all the kids!
                   */
-                 found = 0;
                  for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
                          if (idtype == P_PID && id != cp->p_pid)
                                  continue;
                          if (idtype == P_PGID && id != cp->p_pgrp)
                                  continue;
  
                          switch (cp->p_wcode) {
                          case CLD_TRAPPED:
                                  if (!(options & WTRAPPED))
                                          break;
--- 1154,1173 ----
  
                  /*
                   * Wow! None of the threads on the p_sibling_ns list were
                   * interesting threads. Check all the kids!
                   */
                  for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
                          if (idtype == P_PID && id != cp->p_pid)
                                  continue;
                          if (idtype == P_PGID && id != cp->p_pgrp)
                                  continue;
+                         if (PROC_IS_BRANDED(pp)) {
+                                 if (BROP(pp)->b_wait_filter != NULL &&
+                                     BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+                                         continue;
+                         }
  
                          switch (cp->p_wcode) {
                          case CLD_TRAPPED:
                                  if (!(options & WTRAPPED))
                                          break;
*** 1105,1119 ****
  
                          if (idtype == P_PID)
                                  break;
                  }
  
                  /*
                   * If we found no interesting processes at all,
                   * break out and return ECHILD.
                   */
!                 if (found + proc_gone == 0)
                          break;
  
                  if (options & WNOHANG) {
                          mutex_exit(&pidlock);
                          bzero(ip, sizeof (k_siginfo_t));
--- 1232,1247 ----
  
                          if (idtype == P_PID)
                                  break;
                  }
  
+ no_real_children:
                  /*
                   * If we found no interesting processes at all,
                   * break out and return ECHILD.
                   */
!                 if (!brand_wants_wait && (found + proc_gone == 0))
                          break;
  
                  if (options & WNOHANG) {
                          mutex_exit(&pidlock);
                          bzero(ip, sizeof (k_siginfo_t));
*** 1128,1138 ****
                  /*
                   * If we found no processes of interest that could
                   * change state while we wait, we don't wait at all.
                   * Get out with ECHILD according to SVID.
                   */
!                 if (found == proc_gone)
                          break;
  
                  if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
                          mutex_exit(&pidlock);
                          return (EINTR);
--- 1256,1266 ----
                  /*
                   * If we found no processes of interest that could
                   * change state while we wait, we don't wait at all.
                   * Get out with ECHILD according to SVID.
                   */
!                 if (!brand_wants_wait && (found == proc_gone))
                          break;
  
                  if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
                          mutex_exit(&pidlock);
                          return (EINTR);
*** 1224,1233 ****
--- 1352,1367 ----
          if (p->p_killsqp) {
                  siginfofree(p->p_killsqp);
                  p->p_killsqp = NULL;
          }
  
+         /* Clear any remaining brand data */
+         if (PROC_IS_BRANDED(p)) {
+                 brand_clearbrand(p, B_FALSE);
+         }
+ 
+ 
          prfree(p);      /* inform /proc */
  
          /*
           * Don't free the init processes.
           * Other dying processes will access it.