Print this page
14019 Allow more control over zone init exit actions (fix mismerge)
14019 Allow more control over zone init exit actions
Portions contributed by: Joshua M. Clulow <jmc@joyent.com>
Portions contributed by: Andy Fiddaman <andy@omnios.org>
Reviewed by: C Fraire <cfraire@me.com>
Reviewed by: Gordon Ross <Gordon.W.Ross@gmail.com>
Approved by: Robert Mustacchi <rm@fingolfin.org>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/exit.c
          +++ new/usr/src/uts/common/os/exit.c
↓ open down ↓ 158 lines elided ↑ open up ↑
 159  159          }
 160  160  
 161  161          nvlist_free(nvl);
 162  162  }
 163  163  
 164  164  /*
 165  165   * Called by proc_exit() when a zone's init exits, presumably because
 166  166   * it failed.  As long as the given zone is still in the "running"
 167  167   * state, we will re-exec() init, but first we need to reset things
 168  168   * which are usually inherited across exec() but will break init's
 169      - * assumption that it is being exec()'d from a virgin process.  Most
      169 + * assumption that it is being exec()'d from a virgin process.  Most
 170  170   * importantly this includes closing all file descriptors (exec only
 171  171   * closes those marked close-on-exec) and resetting signals (exec only
 172  172   * resets handled signals, and we need to clear any signals which
 173  173   * killed init).  Anything else that exec(2) says would be inherited,
 174  174   * but would affect the execution of init, needs to be reset.
 175  175   */
 176  176  static int
 177  177  restart_init(int what, int why)
 178  178  {
 179  179          kthread_t *t = curthread;
↓ open down ↓ 156 lines elided ↑ open up ↑
 336  336   * Enter zombie state.
 337  337   * Wake up parent and init processes,
 338  338   * and dispose of children.
 339  339   */
 340  340  void
 341  341  exit(int why, int what)
 342  342  {
 343  343          /*
 344  344           * If proc_exit() fails, then some other lwp in the process
 345  345           * got there first.  We just have to call lwp_exit() to allow
 346      -         * the other lwp to finish exiting the process.  Otherwise we're
      346 +         * the other lwp to finish exiting the process.  Otherwise we're
 347  347           * restarting init, and should return.
 348  348           */
 349  349          if (proc_exit(why, what) != 0) {
 350  350                  mutex_enter(&curproc->p_lock);
 351  351                  ASSERT(curproc->p_flag & SEXITLWPS);
 352  352                  lwp_exit();
 353  353                  /* NOTREACHED */
 354  354          }
 355  355  }
 356  356  
 357  357  /*
 358  358   * Set the SEXITING flag on the process, after making sure /proc does
 359      - * not have it locked.  This is done in more places than proc_exit(),
      359 + * not have it locked.  This is done in more places than proc_exit(),
 360  360   * so it is a separate function.
 361  361   */
 362  362  void
 363  363  proc_is_exiting(proc_t *p)
 364  364  {
 365  365          mutex_enter(&p->p_lock);
 366  366          prbarrier(p);
 367  367          p->p_flag |= SEXITING;
 368  368          mutex_exit(&p->p_lock);
 369  369  }
↓ open down ↓ 94 lines elided ↑ open up ↑
 464  464                  }
 465  465          } else {
 466  466                  /*
 467  467                   * No restart modifiers on the zone, attempt to restart init.
 468  468                   */
 469  469                  if (restart_init(what, why) == 0) {
 470  470                          return (B_TRUE);
 471  471                  }
 472  472          }
 473  473  
 474      -
 475  474          /*
 476      -         * The restart failed, the zone will shut down.
      475 +         * The restart failed, or the criteria for a restart are not met;
      476 +         * the zone will shut down.
 477  477           */
 478  478          z->zone_init_status = wstat(why, what);
 479  479          (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
 480  480          z->zone_proc_initpid = -1;
 481  481          return (B_FALSE);
 482  482  }
 483  483  
 484  484  /*
 485  485   * Return value:
 486  486   *   1 - exitlwps() failed, call (or continue) lwp_exit()
↓ open down ↓ 16 lines elided ↑ open up ↑
 503  503          uint_t lwpdir_sz;
 504  504          tidhash_t *tidhash;
 505  505          uint_t tidhash_sz;
 506  506          ret_tidhash_t *ret_tidhash;
 507  507          refstr_t *cwd;
 508  508          hrtime_t hrutime, hrstime;
 509  509          int evaporate;
 510  510  
 511  511          /*
 512  512           * Stop and discard the process's lwps except for the current one,
 513      -         * unless some other lwp beat us to it.  If exitlwps() fails then
      513 +         * unless some other lwp beat us to it.  If exitlwps() fails then
 514  514           * return and the calling lwp will call (or continue in) lwp_exit().
 515  515           */
 516  516          proc_is_exiting(p);
 517  517          if (exitlwps(0) != 0)
 518  518                  return (1);
 519  519  
 520  520          mutex_enter(&p->p_lock);
 521  521          if (p->p_ttime > 0) {
 522  522                  /*
 523  523                   * Account any remaining ticks charged to this process
 524  524                   * on its way out.
 525  525                   */
 526  526                  (void) task_cpu_time_incr(p->p_task, p->p_ttime);
 527  527                  p->p_ttime = 0;
 528  528          }
 529  529          mutex_exit(&p->p_lock);
 530  530  
      531 +        /*
      532 +         * Don't let init exit unless zone_start_init() failed its exec, or
      533 +         * we are shutting down the zone or the machine.
      534 +         *
      535 +         * Since we are single threaded, we don't need to lock the
      536 +         * following accesses to zone_proc_initpid.
      537 +         */
 531  538          if (p->p_pid == z->zone_proc_initpid) {
 532  539                  /* If zone's init restarts, we're done here. */
 533  540                  if (zone_init_exit(z, why, what))
 534  541                          return (0);
 535  542          }
 536  543  
 537  544          /*
 538  545           * Delay firing probes (and performing brand cleanup) until after the
 539  546           * zone_proc_initpid check. Cases which result in zone shutdown or
 540  547           * restart via zone_kadmin eventually result in a call back to
↓ open down ↓ 457 lines elided ↑ open up ↑
 998 1005                  exitpctx(p);
 999 1006                  kpreempt_enable();
1000 1007  
1001 1008                  freepctx(p, 0);
1002 1009          }
1003 1010  
1004 1011          /*
1005 1012           * curthread's proc pointer is changed to point to the 'sched'
1006 1013           * process for the corresponding zone, except in the case when
1007 1014           * the exiting process is in fact a zsched instance, in which
1008      -         * case the proc pointer is set to p0.  We do so, so that the
     1015 +         * case the proc pointer is set to p0.  We do so, so that the
1009 1016           * process still points at the right zone when we call the VN_RELE()
1010 1017           * below.
1011 1018           *
1012 1019           * This is because curthread's original proc pointer can be freed as
1013 1020           * soon as the child sends a SIGCLD to its parent.  We use zsched so
1014 1021           * that for user processes, even in the final moments of death, the
1015 1022           * process is still associated with its zone.
1016 1023           */
1017 1024          if (p != t->t_procp->p_zone->zone_zsched)
1018 1025                  t->t_procp = t->t_procp->p_zone->zone_zsched;
↓ open down ↓ 63 lines elided ↑ open up ↑
1082 1089          if (cdir)
1083 1090                  VN_RELE(cdir);
1084 1091          if (rdir)
1085 1092                  VN_RELE(rdir);
1086 1093          if (cwd)
1087 1094                  refstr_rele(cwd);
1088 1095  
1089 1096          /*
1090 1097           * task_rele() may ultimately cause the zone to go away (or
1091 1098           * may cause the last user process in a zone to go away, which
1092      -         * signals zsched to go away).  So prior to this call, we must
     1099 +         * signals zsched to go away).  So prior to this call, we must
1093 1100           * no longer point at zsched.
1094 1101           */
1095 1102          t->t_procp = &p0;
1096 1103  
1097 1104          kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
1098 1105          kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
1099 1106          while (ret_tidhash != NULL) {
1100 1107                  ret_tidhash_t *next = ret_tidhash->rth_next;
1101 1108                  kmem_free(ret_tidhash->rth_tidhash,
1102 1109                      ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
↓ open down ↓ 507 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX