Print this page
OS-4818 contract template disappears on exec
OS-4460 exec brands processes that still have multiple threads
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4151 setbrand hooks should be sane during fork
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4144 panic in lx_freelwp during zone shutdown
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4129 lxbrand should not abuse p_brand_data for storing exit signal
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3820 lxbrand ptrace(2): the next generation
OS-3685 lxbrand PTRACE_O_TRACEFORK race condition
OS-3834 lxbrand 64-bit strace(1) reports 64-bit process as using x32 ABI
OS-3794 lxbrand panic on init signal death
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3140 In LX zone 'ps fax' does not show all processes
OS-3429 Expose zone's init exit status
OS-3149 lx brand always sends SIGCHLD to parent processes, regardless of how clone was invoked
OS-2887 lxbrand add WALL, WCLONE, WNOTHREAD support to waitid
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

@@ -19,11 +19,11 @@
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
  */
 
 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
 
 #include <sys/types.h>

@@ -228,11 +228,11 @@
         lwp->lwp_extsig = 0;
         if (lwp->lwp_curinfo != NULL) {
                 siginfofree(lwp->lwp_curinfo);
                 lwp->lwp_curinfo = NULL;
         }
-        lwp_ctmpl_clear(lwp);
+        lwp_ctmpl_clear(lwp, B_FALSE);
 
         /*
          * Reset both the process root directory and the current working
          * directory to the root of the zone just as we do during boot.
          */

@@ -364,24 +364,11 @@
                 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
                 p->p_ttime = 0;
         }
         mutex_exit(&p->p_lock);
 
-        DTRACE_PROC(lwp__exit);
-        DTRACE_PROC1(exit, int, why);
-
         /*
-         * Will perform any brand specific proc exit processing, since this
-         * is always the last lwp, will also perform lwp_exit and free brand
-         * data
-         */
-        if (PROC_IS_BRANDED(p)) {
-                lwp_detach_brand_hdlrs(lwp);
-                brand_clearbrand(p, B_FALSE);
-        }
-
-        /*
          * Don't let init exit unless zone_start_init() failed its exec, or
          * we are shutting down the zone or the machine.
          *
          * Since we are single threaded, we don't need to lock the
          * following accesses to zone_proc_initpid.

@@ -388,16 +375,39 @@
          */
         if (p->p_pid == z->zone_proc_initpid) {
                 if (z->zone_boot_err == 0 &&
                     zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
                     zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
+
+                        /*
+                         * If the init process should be restarted, the
+                         * "zone_restart_init" member will be set.  Some init
+                         * programs in branded zones do not tolerate a restart
+                         * in the traditional manner; setting the
+                         * "zone_reboot_on_init_exit" member will cause the
+                         * entire zone to be rebooted instead.  If neither of
+                         * these flags is set the zone will shut down.
+                         */
+                        if (z->zone_reboot_on_init_exit == B_TRUE &&
+                            z->zone_restart_init == B_TRUE) {
+                                /*
+                                 * Trigger a zone reboot and continue
+                                 * with exit processing.
+                                 */
+                                z->zone_init_status = wstat(why, what);
+                                (void) zone_kadmin(A_REBOOT, 0, NULL,
+                                    zone_kcred());
+
+                        } else {
                         if (z->zone_restart_init == B_TRUE) {
                                 if (restart_init(what, why) == 0)
                                         return (0);
-                        } else {
+                                }
+
+                                z->zone_init_status = wstat(why, what);
                                 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
-                                    CRED());
+                                    zone_kcred());
                         }
                 }
 
                 /*
                  * Since we didn't or couldn't restart init, we clear

@@ -405,10 +415,36 @@
                  * processing.
                  */
                 z->zone_proc_initpid = -1;
         }
 
+        /*
+         * Delay firing probes (and performing brand cleanup) until after the
+         * zone_proc_initpid check. Cases which result in zone shutdown or
+         * restart via zone_kadmin eventually result in a call back to
+         * proc_exit.
+         */
+        DTRACE_PROC(lwp__exit);
+        DTRACE_PROC1(exit, int, why);
+
+        /*
+         * Will perform any brand specific proc exit processing. Since this
+         * is always the last lwp, will also perform lwp exit/free and proc
+         * exit. Brand data will be freed when the process is reaped.
+         */
+        if (PROC_IS_BRANDED(p)) {
+                BROP(p)->b_lwpexit(lwp);
+                BROP(p)->b_proc_exit(p);
+                /*
+                 * To ensure that b_proc_exit has access to brand-specific data
+                 * contained by the one remaining lwp, call the freelwp hook as
+                 * the last part of this clean-up process.
+                 */
+                BROP(p)->b_freelwp(lwp);
+                lwp_detach_brand_hdlrs(lwp);
+        }
+
         lwp_pcb_exit();
 
         /*
          * Allocate a sigqueue now, before we grab locks.
          * It will be given to sigcld(), below.

@@ -656,14 +692,26 @@
          * Don't try to assign init's children to init.
          */
         if ((q = p->p_child) != NULL && p != proc_init) {
                 struct proc     *np;
                 struct proc     *initp = proc_init;
+                pid_t           zone_initpid = 1;
+                struct proc     *zoneinitp = NULL;
                 boolean_t       setzonetop = B_FALSE;
 
-                if (!INGLOBALZONE(curproc))
+                if (!INGLOBALZONE(curproc)) {
+                        zone_initpid = curproc->p_zone->zone_proc_initpid;
+
+                        ASSERT(MUTEX_HELD(&pidlock));
+                        zoneinitp = prfind(zone_initpid);
+                        if (zoneinitp != NULL) {
+                                initp = zoneinitp;
+                        } else {
+                                zone_initpid = 1;
                         setzonetop = B_TRUE;
+                        }
+                }
 
                 pgdetach(p);
 
                 do {
                         np = q->p_sibling;

@@ -671,11 +719,12 @@
                          * Delete it from its current parent new state
                          * list and add it to init new state list
                          */
                         delete_ns(q->p_parent, q);
 
-                        q->p_ppid = 1;
+                        q->p_ppid = zone_initpid;
+
                         q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
                         if (setzonetop) {
                                 mutex_enter(&q->p_lock);
                                 q->p_flag |= SZONETOP;
                                 mutex_exit(&q->p_lock);

@@ -845,12 +894,54 @@
         else
                 t->t_procp = &p0;
 
         mutex_exit(&p->p_lock);
         if (!evaporate) {
+                /*
+                 * The brand specific code only happens when the brand has a
+                 * function to call in place of sigcld and the parent of the
+                 * exiting process is not the global zone init. If the parent
+                 * is the global zone init, then the process was reparented,
+                 * and we don't want brand code delivering possibly strange
+                 * signals to init. Also, init is not branded, so any brand
+                 * specific exit data will not be picked up by init anyway.
+                 */
+                if (PROC_IS_BRANDED(p) &&
+                    BROP(p)->b_exit_with_sig != NULL &&
+                    p->p_ppid != 1) {
+                        /*
+                         * The code for _fini that could unload the brand_t
+                         * blocks until the count of zones using the module
+                         * reaches zero. Zones decrement the refcount on their
+                         * brands only after all user tasks in that zone have
+                         * exited and been waited on. The decrement on the
+                         * brand's refcount happen in zone_destroy(). That
+                         * depends on zone_shutdown() having been completed.
+                         * zone_shutdown() includes a call to zone_empty(),
+                         * where the zone waits for itself to reach the state
+                         * ZONE_IS_EMPTY. This state is only set in either
+                         * zone_shutdown(), when there are no user processes as
+                         * the zone enters this function, or in
+                         * zone_task_rele(). zone_task_rele() is called from
+                         * code triggered by waiting on processes, not by the
+                         * processes exiting through proc_exit().  This means
+                         * all the branded processes that could exist for a
+                         * specific brand_t must exit and get reaped before the
+                         * refcount on the brand_t can reach 0. _fini will
+                         * never unload the corresponding brand module before
+                         * proc_exit finishes execution for all processes
+                         * branded with a particular brand_t, which makes the
+                         * operation below safe to do. Brands that wish to use
+                         * this mechanism must wait in _fini as described
+                         * above.
+                         */
+                        BROP(p)->b_exit_with_sig(p, sqp);
+                } else {
                 p->p_pidflag &= ~CLDPEND;
                 sigcld(p, sqp);
+                }
+
         } else {
                 /*
                  * Do what sigcld() would do if the disposition
                  * of the SIGCHLD signal were set to be ignored.
                  */

@@ -925,14 +1016,13 @@
  * and pass back status from them.
  */
 int
 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
 {
-        int found;
         proc_t *cp, *pp;
-        int proc_gone;
         int waitflag = !(options & WNOWAIT);
+        boolean_t have_brand_helper = B_FALSE;
 
         /*
          * Obsolete flag, defined here only for binary compatibility
          * with old statically linked executables.  Delete this when
          * we no longer care about these old and broken applications.

@@ -956,11 +1046,12 @@
         }
 
         pp = ttoproc(curthread);
 
         /*
-         * lock parent mutex so that sibling chain can be searched.
+         * Anytime you are looking for a process, you take pidlock to prevent
+         * things from changing as you look.
          */
         mutex_enter(&pidlock);
 
         /*
          * if we are only looking for exited processes and child_ns list

@@ -976,21 +1067,53 @@
                 }
                 mutex_exit(&pidlock);
                 return (ECHILD);
         }
 
-        while (pp->p_child != NULL) {
+        if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
+                have_brand_helper = B_TRUE;
+        }
 
-                proc_gone = 0;
+        while (pp->p_child != NULL || have_brand_helper) {
+                boolean_t brand_wants_wait = B_FALSE;
+                int proc_gone = 0;
+                int found = 0;
 
+                /*
+                 * Give the brand a chance to return synthetic results from
+                 * this waitid() call before we do the real thing.
+                 */
+                if (have_brand_helper) {
+                        int ret;
+
+                        if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
+                            &brand_wants_wait, &ret) == 0) {
+                                mutex_exit(&pidlock);
+                                return (ret);
+                        }
+
+                        if (pp->p_child == NULL) {
+                                goto no_real_children;
+                        }
+                }
+
+                /*
+                 * Look for interesting children in the newstate list.
+                 */
+                VERIFY(pp->p_child != NULL);
                 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
                         if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
                                 continue;
                         if (idtype == P_PID && id != cp->p_pid)
                                 continue;
                         if (idtype == P_PGID && id != cp->p_pgrp)
                                 continue;
+                        if (PROC_IS_BRANDED(pp)) {
+                                if (BROP(pp)->b_wait_filter != NULL &&
+                                    BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+                                        continue;
+                        }
 
                         switch (cp->p_wcode) {
 
                         case CLD_TRAPPED:
                         case CLD_STOPPED:

@@ -1031,16 +1154,20 @@
 
                 /*
                  * Wow! None of the threads on the p_sibling_ns list were
                  * interesting threads. Check all the kids!
                  */
-                found = 0;
                 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
                         if (idtype == P_PID && id != cp->p_pid)
                                 continue;
                         if (idtype == P_PGID && id != cp->p_pgrp)
                                 continue;
+                        if (PROC_IS_BRANDED(pp)) {
+                                if (BROP(pp)->b_wait_filter != NULL &&
+                                    BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+                                        continue;
+                        }
 
                         switch (cp->p_wcode) {
                         case CLD_TRAPPED:
                                 if (!(options & WTRAPPED))
                                         break;

@@ -1105,15 +1232,16 @@
 
                         if (idtype == P_PID)
                                 break;
                 }
 
+no_real_children:
                 /*
                  * If we found no interesting processes at all,
                  * break out and return ECHILD.
                  */
-                if (found + proc_gone == 0)
+                if (!brand_wants_wait && (found + proc_gone == 0))
                         break;
 
                 if (options & WNOHANG) {
                         mutex_exit(&pidlock);
                         bzero(ip, sizeof (k_siginfo_t));

@@ -1128,11 +1256,11 @@
                 /*
                  * If we found no processes of interest that could
                  * change state while we wait, we don't wait at all.
                  * Get out with ECHILD according to SVID.
                  */
-                if (found == proc_gone)
+                if (!brand_wants_wait && (found == proc_gone))
                         break;
 
                 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
                         mutex_exit(&pidlock);
                         return (EINTR);

@@ -1224,10 +1352,16 @@
         if (p->p_killsqp) {
                 siginfofree(p->p_killsqp);
                 p->p_killsqp = NULL;
         }
 
+        /* Clear any remaining brand data */
+        if (PROC_IS_BRANDED(p)) {
+                brand_clearbrand(p, B_FALSE);
+        }
+
+
         prfree(p);      /* inform /proc */
 
         /*
          * Don't free the init processes.
          * Other dying processes will access it.