1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2013 Gary Mills
  24  *
  25  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * University Copyright- Copyright (c) 1982, 1986, 1988
  33  * The Regents of the University of California
  34  * All Rights Reserved
  35  *
  36  * University Acknowledgment- Portions of this document are derived from
  37  * software developed by the University of California, Berkeley, and its
  38  * contributors.
  39  */
  40 
  41 /*
  42  * init(1M) is the general process spawning program.  Its primary job is to
  43  * start and restart svc.startd for smf(5).  For backwards-compatibility it also
  44  * spawns and respawns processes according to /etc/inittab and the current
  45  * run-level.  It reads /etc/default/inittab for general configuration.
  46  *
  47  * To change run-levels the system administrator runs init from the command
  48  * line with a level name.  init signals svc.startd via libscf and directs the
  49  * zone's init (pid 1 in the global zone) what to do by sending it a signal;
  50  * these signal numbers are commonly refered to in the code as 'states'.  Valid
  51  * run-levels are [sS0123456].  Additionally, init can be given directives
  52  * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
  53  *
  54  * When init processes inittab entries, it finds processes that are to be
  55  * spawned at various run-levels.  inittab contains the set of the levels for
  56  * which each inittab entry is valid.
  57  *
  58  * State File and Restartability
  59  *   Premature exit by init(1M) is handled as a special case by the kernel:
  60  *   init(1M) will be immediately re-executed, retaining its original PID.  (PID
  61  *   1 in the global zone.)  To track the processes it has previously spawned,
  62  *   as well as other mutable state, init(1M) regularly updates a state file
  63  *   such that its subsequent invocations have knowledge of its various
  64  *   dependent processes and duties.
  65  *
  66  * Process Contracts
  67  *   We start svc.startd(1M) in a contract and transfer inherited contracts when
  68  *   restarting it.  Everything else is started using the legacy contract
  69  *   template, and the created contracts are abandoned when they become empty.
  70  *
  71  * utmpx Entry Handling
  72  *   Because init(1M) no longer governs the startup process, its knowledge of
  73  *   when utmpx becomes writable is indirect.  However, spawned processes
  74  *   expect to be constructed with valid utmpx entries.  As a result, attempts
  75  *   to write normal entries will be retried until successful.
  76  *
  77  * Maintenance Mode
  78  *   In certain failure scenarios, init(1M) will enter a maintenance mode, in
  79  *   which it invokes sulogin(1M) to allow the operator an opportunity to
  80  *   repair the system.  Normally, this operation is performed as a
  81  *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
  82  *   diagnosis to be completed.  In the cases that fork(2) requests themselves
  83  *   fail, init(1M) will directly execute sulogin(1M), and allow the kernel to
  84  *   restart init(1M) on exit from the operator session.
  85  *
  86  *   One scenario where init(1M) enters its maintenance mode is when
  87  *   svc.startd(1M) begins to fail rapidly, defined as when the average time
  88  *   between recent failures drops below a given threshold.
  89  */
  90 
  91 #include <sys/contract/process.h>
  92 #include <sys/ctfs.h>
  93 #include <sys/stat.h>
  94 #include <sys/statvfs.h>
  95 #include <sys/stropts.h>
  96 #include <sys/systeminfo.h>
  97 #include <sys/time.h>
  98 #include <sys/termios.h>
  99 #include <sys/tty.h>
 100 #include <sys/types.h>
 101 #include <sys/utsname.h>
 102 
 103 #include <bsm/adt_event.h>
 104 #include <bsm/libbsm.h>
 105 #include <security/pam_appl.h>
 106 
 107 #include <assert.h>
 108 #include <ctype.h>
 109 #include <dirent.h>
 110 #include <errno.h>
 111 #include <fcntl.h>
 112 #include <libcontract.h>
 113 #include <libcontract_priv.h>
 114 #include <libintl.h>
 115 #include <libscf.h>
 116 #include <libscf_priv.h>
 117 #include <poll.h>
 118 #include <procfs.h>
 119 #include <signal.h>
 120 #include <stdarg.h>
 121 #include <stdio.h>
 122 #include <stdio_ext.h>
 123 #include <stdlib.h>
 124 #include <string.h>
 125 #include <strings.h>
 126 #include <syslog.h>
 127 #include <time.h>
 128 #include <ulimit.h>
 129 #include <unistd.h>
 130 #include <utmpx.h>
 131 #include <wait.h>
 132 #include <zone.h>
 133 #include <ucontext.h>
 134 
 135 #undef  sleep
 136 
 137 #define fioctl(p, sptr, cmd)    ioctl(fileno(p), sptr, cmd)
 138 #define min(a, b)               (((a) < (b)) ? (a) : (b))
 139 
 140 #define TRUE    1
 141 #define FALSE   0
 142 #define FAILURE -1
 143 
 144 #define UT_USER_SZ      32      /* Size of a utmpx ut_user field */
 145 #define UT_LINE_SZ      32      /* Size of a utmpx ut_line field */
 146 
 147 /*
 148  * SLEEPTIME    The number of seconds "init" sleeps between wakeups if
 149  *              nothing else requires this "init" wakeup.
 150  */
 151 #define SLEEPTIME       (5 * 60)
 152 
 153 /*
 154  * MAXCMDL      The maximum length of a command string in inittab.
 155  */
 156 #define MAXCMDL 512
 157 
 158 /*
 159  * EXEC         The length of the prefix string added to all comamnds
 160  *              found in inittab.
 161  */
 162 #define EXEC    (sizeof ("exec ") - 1)
 163 
 164 /*
 165  * TWARN        The amount of time between warning signal, SIGTERM,
 166  *              and the fatal kill signal, SIGKILL.
 167  */
 168 #define TWARN   5
 169 
 170 #define id_eq(x, y)     ((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
 171                         x[3] == y[3]) ? TRUE : FALSE)
 172 
 173 /*
 174  * The kernel's default umask is 022 these days; since some processes inherit
 175  * their umask from init, init will set it from CMASK in /etc/default/init.
 176  * init gets the default umask from the kernel, it sets it to 022 whenever
 177  * it wants to create a file and reverts to CMASK afterwards.
 178  */
 179 
 180 static int cmask;
 181 
 182 /*
 183  * The following definitions, concluding with the 'lvls' array, provide a
 184  * common mapping between level-name (like 'S'), signal number (state),
 185  * run-level mask, and specific properties associated with a run-level.
 186  * This array should be accessed using the routines lvlname_to_state(),
 187  * lvlname_to_mask(), state_to_mask(), and state_to_flags().
 188  */
 189 
 190 /*
 191  * Correspondence of signals to init actions.
 192  */
 193 #define LVLQ            SIGHUP
 194 #define LVL0            SIGINT
 195 #define LVL1            SIGQUIT
 196 #define LVL2            SIGILL
 197 #define LVL3            SIGTRAP
 198 #define LVL4            SIGIOT
 199 #define LVL5            SIGEMT
 200 #define LVL6            SIGFPE
 201 #define SINGLE_USER     SIGBUS
 202 #define LVLa            SIGSEGV
 203 #define LVLb            SIGSYS
 204 #define LVLc            SIGPIPE
 205 
 206 /*
 207  * Bit Mask for each level.  Used to determine legal levels.
 208  */
 209 #define MASK0   0x0001
 210 #define MASK1   0x0002
 211 #define MASK2   0x0004
 212 #define MASK3   0x0008
 213 #define MASK4   0x0010
 214 #define MASK5   0x0020
 215 #define MASK6   0x0040
 216 #define MASKSU  0x0080
 217 #define MASKa   0x0100
 218 #define MASKb   0x0200
 219 #define MASKc   0x0400
 220 
 221 #define MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
 222 #define MASK_abc (MASKa | MASKb | MASKc)
 223 
 224 /*
 225  * Flags to indicate properties of various states.
 226  */
 227 #define LSEL_RUNLEVEL   0x0001  /* runlevels you can transition to */
 228 
 229 typedef struct lvl {
 230         int     lvl_state;
 231         int     lvl_mask;
 232         char    lvl_name;
 233         int     lvl_flags;
 234 } lvl_t;
 235 
 236 static lvl_t lvls[] = {
 237         { LVLQ,         0,      'Q', 0                                  },
 238         { LVLQ,         0,      'q', 0                                  },
 239         { LVL0,         MASK0,  '0', LSEL_RUNLEVEL                      },
 240         { LVL1,         MASK1,  '1', LSEL_RUNLEVEL                      },
 241         { LVL2,         MASK2,  '2', LSEL_RUNLEVEL                      },
 242         { LVL3,         MASK3,  '3', LSEL_RUNLEVEL                      },
 243         { LVL4,         MASK4,  '4', LSEL_RUNLEVEL                      },
 244         { LVL5,         MASK5,  '5', LSEL_RUNLEVEL                      },
 245         { LVL6,         MASK6,  '6', LSEL_RUNLEVEL                      },
 246         { SINGLE_USER,  MASKSU, 'S', LSEL_RUNLEVEL                      },
 247         { SINGLE_USER,  MASKSU, 's', LSEL_RUNLEVEL                      },
 248         { LVLa,         MASKa,  'a', 0                                  },
 249         { LVLb,         MASKb,  'b', 0                                  },
 250         { LVLc,         MASKc,  'c', 0                                  }
 251 };
 252 
 253 #define LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
 254 
 255 /*
 256  * Legal action field values.
 257  */
 258 #define OFF             0       /* Kill process if on, else ignore */
 259 #define RESPAWN         1       /* Continuously restart process when it dies */
 260 #define ONDEMAND        RESPAWN /* Respawn for a, b, c type processes */
 261 #define ONCE            2       /* Start process, do not respawn when dead */
 262 #define WAIT            3       /* Perform once and wait to complete */
 263 #define BOOT            4       /* Start at boot time only */
 264 #define BOOTWAIT        5       /* Start at boot time and wait to complete */
 265 #define POWERFAIL       6       /* Start on powerfail */
 266 #define POWERWAIT       7       /* Start and wait for complete on powerfail */
 267 #define INITDEFAULT     8       /* Default level "init" should start at */
 268 #define SYSINIT         9       /* Actions performed before init speaks */
 269 
 270 #define M_OFF           0001
 271 #define M_RESPAWN       0002
 272 #define M_ONDEMAND      M_RESPAWN
 273 #define M_ONCE          0004
 274 #define M_WAIT          0010
 275 #define M_BOOT          0020
 276 #define M_BOOTWAIT      0040
 277 #define M_PF            0100
 278 #define M_PWAIT         0200
 279 #define M_INITDEFAULT   0400
 280 #define M_SYSINIT       01000
 281 
 282 /* States for the inittab parser in getcmd(). */
 283 #define ID      1
 284 #define LEVELS  2
 285 #define ACTION  3
 286 #define COMMAND 4
 287 #define COMMENT 5
 288 
 289 /*
 290  * inittab entry id constants
 291  */
 292 #define INITTAB_ENTRY_ID_SIZE 4
 293 #define INITTAB_ENTRY_ID_STR_FORMAT "%.4s"      /* if INITTAB_ENTRY_ID_SIZE */
 294                                                 /* changes, this should */
 295                                                 /* change accordingly */
 296 
 297 /*
 298  * Init can be in any of three main states, "normal" mode where it is
 299  * processing entries for the lines file in a normal fashion, "boot" mode,
 300  * where it is only interested in the boot actions, and "powerfail" mode,
 301  * where it is only interested in powerfail related actions. The following
 302  * masks declare the legal actions for each mode.
 303  */
 304 #define NORMAL_MODES    (M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
 305 #define BOOT_MODES      (M_BOOT | M_BOOTWAIT)
 306 #define PF_MODES        (M_PF | M_PWAIT)
 307 
 308 struct PROC_TABLE {
 309         char    p_id[INITTAB_ENTRY_ID_SIZE];    /* Four letter unique id of */
 310                                                 /* process */
 311         pid_t   p_pid;          /* Process id */
 312         short   p_count;        /* How many respawns of this command in */
 313                                 /*   the current series */
 314         long    p_time;         /* Start time for a series of respawns */
 315         short   p_flags;
 316         short   p_exit;         /* Exit status of a process which died */
 317 };
 318 
 319 /*
 320  * Flags for the "p_flags" word of a PROC_TABLE entry:
 321  *
 322  *      OCCUPIED        This slot in init's proc table is in use.
 323  *
 324  *      LIVING          Process is alive.
 325  *
 326  *      NOCLEANUP       efork() is not allowed to cleanup this entry even
 327  *                      if process is dead.
 328  *
 329  *      NAMED           This process has a name, i.e. came from inittab.
 330  *
 331  *      DEMANDREQUEST   Process started by a "telinit [abc]" command.  Processes
 332  *                      formed this way are respawnable and immune to level
 333  *                      changes as long as their entry exists in inittab.
 334  *
 335  *      TOUCHED         Flag used by remv() to determine whether it has looked
 336  *                      at an entry while checking for processes to be killed.
 337  *
 338  *      WARNED          Flag used by remv() to mark processes that have been
 339  *                      sent the SIGTERM signal.  If they don't die in 5
 340  *                      seconds, they are sent the SIGKILL signal.
 341  *
 342  *      KILLED          Flag used by remv() to mark procs that have been sent
 343  *                      the SIGTERM and SIGKILL signals.
 344  *
 345  *      PF_MASK         Bitwise or of legal flags, for sanity checking.
 346  */
 347 #define OCCUPIED        01
 348 #define LIVING          02
 349 #define NOCLEANUP       04
 350 #define NAMED           010
 351 #define DEMANDREQUEST   020
 352 #define TOUCHED         040
 353 #define WARNED          0100
 354 #define KILLED          0200
 355 #define PF_MASK         0377
 356 
 357 /*
 358  * Respawn limits for processes that are to be respawned:
 359  *
 360  *      SPAWN_INTERVAL  The number of seconds over which "init" will try to
 361  *                      respawn a process SPAWN_LIMIT times before it gets mad.
 362  *
 363  *      SPAWN_LIMIT     The number of respawns "init" will attempt in
 364  *                      SPAWN_INTERVAL seconds before it generates an
 365  *                      error message and inhibits further tries for
 366  *                      INHIBIT seconds.
 367  *
 368  *      INHIBIT         The number of seconds "init" ignores an entry it had
 369  *                      trouble spawning unless a "telinit Q" is received.
 370  */
 371 
 372 #define SPAWN_INTERVAL  (2*60)
 373 #define SPAWN_LIMIT     10
 374 #define INHIBIT         (5*60)
 375 
 376 /*
 377  * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
 378  */
 379 #define ID_MAX_STR_LEN  10
 380 
 381 #define NULLPROC        ((struct PROC_TABLE *)(0))
 382 #define NO_ROOM         ((struct PROC_TABLE *)(FAILURE))
 383 
 384 struct CMD_LINE {
 385         char c_id[INITTAB_ENTRY_ID_SIZE];       /* Four letter unique id of */
 386                                                 /* process to be affected by */
 387                                                 /* action */
 388         short c_levels; /* Mask of legal levels for process */
 389         short c_action; /* Mask for type of action required */
 390         char *c_command; /* Pointer to init command */
 391 };
 392 
 393 struct  pidrec {
 394         int     pd_type;        /* Command type */
 395         pid_t   pd_pid;         /* pid to add or remove */
 396 };
 397 
 398 /*
 399  * pd_type's
 400  */
 401 #define ADDPID  1
 402 #define REMPID  2
 403 
 404 static struct   pidlist {
 405         pid_t   pl_pid;         /* pid to watch for */
 406         int     pl_dflag;       /* Flag indicating SIGCLD from this pid */
 407         short   pl_exit;        /* Exit status of proc */
 408         struct  pidlist *pl_next; /* Next in list */
 409 } *Plhead, *Plfree;
 410 
 411 /*
 412  * The following structure contains a set of modes for /dev/syscon
 413  * and should match the default contents of /etc/ioctl.syscon.
 414  */
 415 static struct termios   dflt_termios = {
 416         .c_iflag = BRKINT|ICRNL|IXON|IMAXBEL,
 417         .c_oflag = OPOST|ONLCR|TAB3,
 418         .c_cflag = CS8|CREAD|B9600,
 419         .c_lflag = ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN,
 420         .c_cc = { CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
 421             CSTART, CSTOP, CSWTCH, CDSUSP, CRPRNT, CFLUSH, CWERASE, CLNEXT,
 422             CSTATUS, CERASE2, 0
 423         }
 424 };
 425 
 426 static struct termios   stored_syscon_termios;
 427 static int              write_ioctl = 0;        /* Rewrite /etc/ioctl.syscon */
 428 
 429 static union WAKEUP {
 430         struct WAKEFLAGS {
 431                 unsigned w_usersignal : 1;      /* User sent signal to "init" */
 432                 unsigned w_childdeath : 1;      /* An "init" child died */
 433                 unsigned w_powerhit : 1;        /* OS experienced powerfail */
 434         }       w_flags;
 435         int w_mask;
 436 } wakeup;
 437 
 438 
 439 struct init_state {
 440         int                     ist_runlevel;
 441         int                     ist_num_proc;
 442         int                     ist_utmpx_ok;
 443         struct PROC_TABLE       ist_proc_table[1];
 444 };
 445 
 446 #define cur_state       (g_state->ist_runlevel)
 447 #define num_proc        (g_state->ist_num_proc)
 448 #define proc_table      (g_state->ist_proc_table)
 449 #define utmpx_ok        (g_state->ist_utmpx_ok)
 450 
 451 /* Contract cookies. */
 452 #define ORDINARY_COOKIE         0
 453 #define STARTD_COOKIE           1
 454 
 455 
 456 #ifndef NDEBUG
 457 #define bad_error(func, err)    {                                       \
 458         (void) fprintf(stderr, "%s:%d: %s() failed with unexpected "    \
 459             "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
 460         abort();                                                        \
 461 }
 462 #else
 463 #define bad_error(func, err)    abort()
 464 #endif
 465 
 466 
 467 /*
 468  * Useful file and device names.
 469  */
 470 static char *CONSOLE      = "/dev/console";     /* Real system console */
 471 static char *INITPIPE_DIR = "/var/run";
 472 static char *INITPIPE     = "/var/run/initpipe";
 473 
 474 #define INIT_STATE_DIR "/etc/svc/volatile"
 475 static const char * const init_state_file = INIT_STATE_DIR "/init.state";
 476 static const char * const init_next_state_file =
 477         INIT_STATE_DIR "/init-next.state";
 478 
 479 static const int init_num_proc = 20;    /* Initial size of process table. */
 480 
 481 static char *UTMPX       = UTMPX_FILE;          /* Snapshot record file */
 482 static char *WTMPX       = WTMPX_FILE;          /* Long term record file */
 483 static char *INITTAB     = "/etc/inittab";      /* Script file for "init" */
 484 static char *SYSTTY      = "/dev/systty";       /* System Console */
 485 static char *SYSCON      = "/dev/syscon";       /* Virtual System console */
 486 static char *IOCTLSYSCON = "/etc/ioctl.syscon"; /* Last syscon modes */
 487 static char *ENVFILE     = "/etc/default/init"; /* Default env. */
 488 static char *SU = "/etc/sulogin";       /* Super-user program for single user */
 489 static char *SH = "/sbin/sh";           /* Standard shell */
 490 
 491 /*
 492  * Default Path.  /sbin is included in path only during sysinit phase
 493  */
 494 #define DEF_PATH        "PATH=/usr/sbin:/usr/bin"
 495 #define INIT_PATH       "PATH=/sbin:/usr/sbin:/usr/bin"
 496 
 497 static int      prior_state;
 498 static int      prev_state;     /* State "init" was in last time it woke */
 499 static int      new_state;      /* State user wants "init" to go to. */
 500 static int      lvlq_received;  /* Explicit request to examine state */
 501 static int      op_modes = BOOT_MODES; /* Current state of "init" */
 502 static int      Gchild = 0;     /* Flag to indicate "godchild" died, set in */
 503                                 /*   childeath() and cleared in cleanaux() */
 504 static int      Pfd = -1;       /* fd to receive pids thru */
 505 static unsigned int     spawncnt, pausecnt;
 506 static int      rsflag;         /* Set if a respawn has taken place */
 507 static volatile int time_up;    /* Flag set to TRUE by the alarm interrupt */
 508                                 /* routine each time an alarm interrupt */
 509                                 /* takes place. */
 510 static int      sflg = 0;       /* Set if we were booted -s to single user */
 511 static int      rflg = 0;       /* Set if booted -r, reconfigure devices */
 512 static int      bflg = 0;       /* Set if booted -b, don't run rc scripts */
 513 static pid_t    init_pid;       /* PID of "one true" init for current zone */
 514 
 515 static struct init_state *g_state = NULL;
 516 static size_t   g_state_sz;
 517 static int      booting = 1;    /* Set while we're booting. */
 518 
 519 /*
 520  * Array for default global environment.
 521  */
 522 #define MAXENVENT       24      /* Max number of default env variables + 1 */
 523                                 /* init can use three itself, so this leaves */
 524                                 /* 20 for the administrator in ENVFILE. */
 525 static char     *glob_envp[MAXENVENT];  /* Array of environment strings */
 526 static int      glob_envn;              /* Number of environment strings */
 527 
 528 
 529 static struct pollfd    poll_fds[1];
 530 static int              poll_nfds = 0;  /* poll_fds is uninitialized */
 531 
 532 /*
 533  * Contracts constants
 534  */
 535 #define SVC_INIT_PREFIX "init:/"
 536 #define SVC_AUX_SIZE (INITTAB_ENTRY_ID_SIZE + 1)
 537 #define SVC_FMRI_SIZE (sizeof (SVC_INIT_PREFIX) + INITTAB_ENTRY_ID_SIZE)
 538 
 539 static int      legacy_tmpl = -1;       /* fd for legacy contract template */
 540 static int      startd_tmpl = -1;       /* fd for svc.startd's template */
 541 static char     startd_svc_aux[SVC_AUX_SIZE];
 542 
 543 static char     startd_cline[256] = ""; /* svc.startd's command line */
 544 static int      do_restart_startd = 1;  /* Whether to restart svc.startd. */
 545 static char     *smf_options = NULL;    /* Options to give to startd. */
 546 static int      smf_debug = 0;          /* Messages for debugging smf(5) */
 547 static time_t   init_boot_time;         /* Substitute for kernel boot time. */
 548 
 549 #define NSTARTD_FAILURE_TIMES   3               /* trigger after 3 failures */
 550 #define STARTD_FAILURE_RATE_NS  5000000000LL    /* 1 failure/5 seconds */
 551 
 552 static hrtime_t startd_failure_time[NSTARTD_FAILURE_TIMES];
 553 static uint_t   startd_failure_index;
 554 
 555 
 556 static char     *prog_name(char *);
 557 static int      state_to_mask(int);
 558 static int      lvlname_to_mask(char, int *);
 559 static void     lscf_set_runlevel(char);
 560 static int      state_to_flags(int);
 561 static char     state_to_name(int);
 562 static int      lvlname_to_state(char);
 563 static int      getcmd(struct CMD_LINE *, char *);
 564 static int      realcon();
 565 static int      spawn_processes();
 566 static int      get_ioctl_syscon();
 567 static int      account(short, struct PROC_TABLE *, char *);
 568 static void     alarmclk();
 569 static void     childeath(int);
 570 static void     cleanaux();
 571 static void     clearent(pid_t, short);
 572 static void     console(boolean_t, char *, ...);
 573 static void     init_signals(void);
 574 static void     setup_pipe();
 575 static void     killproc(pid_t);
 576 static void     init_env();
 577 static void     boot_init();
 578 static void     powerfail();
 579 static void     remv();
 580 static void     write_ioctl_syscon();
 581 static void     spawn(struct PROC_TABLE *, struct CMD_LINE *);
 582 static void     setimer(int);
 583 static void     siglvl(int, siginfo_t *, ucontext_t *);
 584 static void     sigpoll(int);
 585 static void     enter_maintenance(void);
 586 static void     timer(int);
 587 static void     userinit(int, char **);
 588 static void     notify_pam_dead(struct utmpx *);
 589 static long     waitproc(struct PROC_TABLE *);
 590 static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
 591 static struct PROC_TABLE *findpslot(struct CMD_LINE *);
 592 static void     increase_proc_table_size();
 593 static void     st_init();
 594 static void     st_write();
 595 static void     contracts_init();
 596 static void     contract_event(struct pollfd *);
 597 static int      startd_run(const char *, int, ctid_t);
 598 static void     startd_record_failure();
 599 static int      startd_failure_rate_critical();
 600 static char     *audit_boot_msg();
 601 static int      audit_put_record(int, int, char *);
 602 static void     update_boot_archive(int new_state);
 603 
 604 int
 605 main(int argc, char *argv[])
 606 {
 607         int     chg_lvl_flag = FALSE, print_banner = FALSE;
 608         int     may_need_audit = 1;
 609         int     c;
 610         char    *msg;
 611 
 612         /* Get a timestamp for use as boot time, if needed. */
 613         (void) time(&init_boot_time);
 614 
 615         /* Get the default umask */
 616         cmask = umask(022);
 617         (void) umask(cmask);
 618 
 619         /* Parse the arguments to init. Check for single user */
 620         opterr = 0;
 621         while ((c = getopt(argc, argv, "brsm:")) != EOF) {
 622                 switch (c) {
 623                 case 'b':
 624                         rflg = 0;
 625                         bflg = 1;
 626                         if (!sflg)
 627                                 sflg++;
 628                         break;
 629                 case 'r':
 630                         bflg = 0;
 631                         rflg++;
 632                         break;
 633                 case 's':
 634                         if (!bflg)
 635                                 sflg++;
 636                         break;
 637                 case 'm':
 638                         smf_options = optarg;
 639                         smf_debug = (strstr(smf_options, "debug") != NULL);
 640                         break;
 641                 }
 642         }
 643 
 644         /*
 645          * Determine if we are the main init, or a user invoked init, whose job
 646          * it is to inform init to change levels or perform some other action.
 647          */
 648         if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
 649             sizeof (init_pid)) != sizeof (init_pid)) {
 650                 (void) fprintf(stderr, "could not get pid for init\n");
 651                 return (1);
 652         }
 653 
 654         /*
 655          * If this PID is not the same as the "true" init for the zone, then we
 656          * must be in 'user' mode.
 657          */
 658         if (getpid() != init_pid) {
 659                 userinit(argc, argv);
 660         }
 661 
 662         if (getzoneid() != GLOBAL_ZONEID) {
 663                 print_banner = TRUE;
 664         }
 665 
 666         /*
 667          * Initialize state (and set "booting").
 668          */
 669         st_init();
 670 
 671         if (booting && print_banner) {
 672                 struct utsname un;
 673                 char buf[BUFSIZ], *isa;
 674                 long ret;
 675                 int bits = 32;
 676 
 677                 /*
 678                  * We want to print the boot banner as soon as
 679                  * possible.  In the global zone, the kernel does it,
 680                  * but we do not have that luxury in non-global zones,
 681                  * so we will print it here.
 682                  */
 683                 (void) uname(&un);
 684                 ret = sysinfo(SI_ISALIST, buf, sizeof (buf));
 685                 if (ret != -1L && ret <= sizeof (buf)) {
 686                         for (isa = strtok(buf, " "); isa;
 687                             isa = strtok(NULL, " ")) {
 688                                 if (strcmp(isa, "sparcv9") == 0 ||
 689                                     strcmp(isa, "amd64") == 0) {
 690                                         bits = 64;
 691                                         break;
 692                                 }
 693                         }
 694                 }
 695 
 696                 console(B_FALSE,
 697                     "\n\n%s Release %s Version %s %d-bit\r\n",
 698                     un.sysname, un.release, un.version, bits);
 699                 console(B_FALSE,
 700                     "Copyright (c) 1983, 2010, Oracle and/or its affiliates."
 701                     " All rights reserved.\r\n");
 702                 console(B_FALSE, "Copyright 2019 Nexenta Systems, Inc.\r\n");
 703         }
 704 
 705         /*
 706          * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
 707          * so that it can be brought up in the state it was in when the
 708          * system went down; or set to defaults if ioctl.syscon isn't
 709          * valid.
 710          *
 711          * This needs to be done even if we're restarting so reset_modes()
 712          * will work in case we need to go down to single user mode.
 713          */
 714         write_ioctl = get_ioctl_syscon();
 715 
 716         /*
 717          * Set up all signals to be caught or ignored as appropriate.
 718          */
 719         init_signals();
 720 
 721         /* Load glob_envp from ENVFILE. */
 722         init_env();
 723 
 724         contracts_init();
 725 
 726         if (!booting) {
 727                 /* cur_state should have been read in. */
 728 
 729                 op_modes = NORMAL_MODES;
 730 
 731                 /* Rewrite the ioctl file if it was bad. */
 732                 if (write_ioctl)
 733                         write_ioctl_syscon();
 734         } else {
 735                 /*
 736                  * It's fine to boot up with state as zero, because
 737                  * startd will later tell us the real state.
 738                  */
 739                 cur_state = 0;
 740                 op_modes = BOOT_MODES;
 741 
 742                 boot_init();
 743         }
 744 
 745         prev_state = prior_state = cur_state;
 746 
 747         setup_pipe();
 748 
 749         /*
 750          * Here is the beginning of the main process loop.
 751          */
 752         for (;;) {
 753                 if (lvlq_received) {
 754                         setup_pipe();
 755                         lvlq_received = B_FALSE;
 756                 }
 757 
 758                 /*
 759                  * Clean up any accounting records for dead "godchildren".
 760                  */
 761                 if (Gchild)
 762                         cleanaux();
 763 
 764                 /*
 765                  * If in "normal" mode, check all living processes and initiate
 766                  * kill sequence on those that should not be there anymore.
 767                  */
 768                 if (op_modes == NORMAL_MODES && cur_state != LVLa &&
 769                     cur_state != LVLb && cur_state != LVLc)
 770                         remv();
 771 
 772                 /*
 773                  * If a change in run levels is the reason we awoke, now do
 774                  * the accounting to report the change in the utmp file.
 775                  * Also report the change on the system console.
 776                  */
 777                 if (chg_lvl_flag) {
 778                         chg_lvl_flag = FALSE;
 779 
 780                         if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
 781                                 char rl = state_to_name(cur_state);
 782 
 783                                 if (rl != -1)
 784                                         lscf_set_runlevel(rl);
 785                         }
 786 
 787                         may_need_audit = 1;
 788                 }
 789 
 790                 /*
 791                  * Scan the inittab file and spawn and respawn processes that
 792                  * should be alive in the current state. If inittab does not
 793                  * exist default to  single user mode.
 794                  */
 795                 if (spawn_processes() == FAILURE) {
 796                         prior_state = prev_state;
 797                         cur_state = SINGLE_USER;
 798                 }
 799 
 800                 /* If any respawns occurred, take note. */
 801                 if (rsflag) {
 802                         rsflag = 0;
 803                         spawncnt++;
 804                 }
 805 
 806                 /*
 807                  * If a powerfail signal was received during the last
 808                  * sequence, set mode to powerfail.  When spawn_processes() is
 809                  * entered the first thing it does is to check "powerhit".  If
 810                  * it is in PF_MODES then it clears "powerhit" and does
 811                  * a powerfail sequence.  If it is not in PF_MODES, then it
 812                  * puts itself in PF_MODES and then clears "powerhit".  Should
 813                  * "powerhit" get set again while spawn_processes() is working
 814                  * on a powerfail sequence, the following code  will see that
 815                  * spawn_processes() tries to execute the powerfail sequence
 816                  * again.  This guarantees that the powerfail sequence will be
 817                  * successfully completed before further processing takes
 818                  * place.
 819                  */
 820                 if (wakeup.w_flags.w_powerhit) {
 821                         op_modes = PF_MODES;
 822                         /*
 823                          * Make sure that cur_state != prev_state so that
 824                          * ONCE and WAIT types work.
 825                          */
 826                         prev_state = 0;
 827                 } else if (op_modes != NORMAL_MODES) {
 828                         /*
 829                          * If spawn_processes() was not just called while in
 830                          * normal mode, we set the mode to normal and it will
 831                          * be called again to check normal modes.  If we have
 832                          * just finished a powerfail sequence with prev_state
 833                          * equal to zero, we set prev_state equal to cur_state
 834                          * before the next pass through.
 835                          */
 836                         if (op_modes == PF_MODES)
 837                                 prev_state = cur_state;
 838                         op_modes = NORMAL_MODES;
 839                 } else if (cur_state == LVLa || cur_state == LVLb ||
 840                     cur_state == LVLc) {
 841                         /*
 842                          * If it was a change of levels that awakened us and the
 843                          * new level is one of the demand levels then reset
 844                          * cur_state to the previous state and do another scan
 845                          * to take care of the usual respawn actions.
 846                          */
 847                         cur_state = prior_state;
 848                         prior_state = prev_state;
 849                         prev_state = cur_state;
 850                 } else {
 851                         prev_state = cur_state;
 852 
 853                         if (wakeup.w_mask == 0) {
 854                                 int ret;
 855 
 856                                 if (may_need_audit && (cur_state == LVL3)) {
 857                                         msg = audit_boot_msg();
 858 
 859                                         may_need_audit = 0;
 860                                         (void) audit_put_record(ADT_SUCCESS,
 861                                             ADT_SUCCESS, msg);
 862                                         free(msg);
 863                                 }
 864 
 865                                 /*
 866                                  * "init" is finished with all actions for
 867                                  * the current wakeup.
 868                                  */
 869                                 ret = poll(poll_fds, poll_nfds,
 870                                     SLEEPTIME * MILLISEC);
 871                                 pausecnt++;
 872                                 if (ret > 0)
 873                                         contract_event(&poll_fds[0]);
 874                                 else if (ret < 0 && errno != EINTR)
 875                                         console(B_TRUE, "poll() error: %s\n",
 876                                             strerror(errno));
 877                         }
 878 
 879                         if (wakeup.w_flags.w_usersignal) {
 880                                 /*
 881                                  * Install the new level.  This could be a real
 882                                  * change in levels  or a telinit [Q|a|b|c] or
 883                                  * just a telinit to the same level at which
 884                                  * we are running.
 885                                  */
 886                                 if (new_state != cur_state) {
 887                                         if (new_state == LVLa ||
 888                                             new_state == LVLb ||
 889                                             new_state == LVLc) {
 890                                                 prev_state = prior_state;
 891                                                 prior_state = cur_state;
 892                                                 cur_state = new_state;
 893                                         } else {
 894                                                 prev_state = cur_state;
 895                                                 if (cur_state >= 0)
 896                                                         prior_state = cur_state;
 897                                                 cur_state = new_state;
 898                                                 chg_lvl_flag = TRUE;
 899                                         }
 900                                 }
 901 
 902                                 new_state = 0;
 903                         }
 904 
 905                         if (wakeup.w_flags.w_powerhit)
 906                                 op_modes = PF_MODES;
 907 
 908                         /*
 909                          * Clear all wakeup reasons.
 910                          */
 911                         wakeup.w_mask = 0;
 912                 }
 913         }
 914 
 915         /*NOTREACHED*/
 916 }
 917 
 918 static void
 919 update_boot_archive(int new_state)
 920 {
 921         if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
 922                 return;
 923 
 924         if (getzoneid() != GLOBAL_ZONEID)
 925                 return;
 926 
 927         (void) system("/sbin/bootadm -ea update_all");
 928 }
 929 
 930 /*
 931  * void enter_maintenance()
 932  *   A simple invocation of sulogin(1M), with no baggage, in the case that we
 933  *   are unable to activate svc.startd(1M).  We fork; the child runs sulogin;
 934  *   we wait for it to exit.
 935  */
 936 static void
 937 enter_maintenance()
 938 {
 939         struct PROC_TABLE       *su_process;
 940 
 941         console(B_FALSE, "Requesting maintenance mode\n"
 942             "(See /lib/svc/share/README for additional information.)\n");
 943         (void) sighold(SIGCLD);
 944         while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
 945                 (void) pause();
 946         (void) sigrelse(SIGCLD);
 947         if (su_process == NULLPROC) {
 948                 int fd;
 949 
 950                 (void) fclose(stdin);
 951                 (void) fclose(stdout);
 952                 (void) fclose(stderr);
 953                 closefrom(0);
 954 
 955                 fd = open(SYSCON, O_RDWR | O_NOCTTY);
 956                 if (fd >= 0) {
 957                         (void) dup2(fd, 1);
 958                         (void) dup2(fd, 2);
 959                 } else {
 960                         /*
 961                          * Need to issue an error message somewhere.
 962                          */
 963                         syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
 964                             getpid(), SYSCON, strerror(errno));
 965                 }
 966 
 967                 /*
 968                  * Execute the "su" program.
 969                  */
 970                 (void) execle(SU, SU, "-", (char *)0, glob_envp);
 971                 console(B_TRUE, "execle of %s failed: %s\n", SU,
 972                     strerror(errno));
 973                 timer(5);
 974                 exit(1);
 975         }
 976 
 977         /*
 978          * If we are the parent, wait around for the child to die
 979          * or for "init" to be signaled to change levels.
 980          */
 981         while (waitproc(su_process) == FAILURE) {
 982                 /*
 983                  * All other reasons for waking are ignored when in
 984                  * single-user mode.  The only child we are interested
 985                  * in is being waited for explicitly by waitproc().
 986                  */
 987                 wakeup.w_mask = 0;
 988         }
 989 }
 990 
 991 /*
 992  * remv() scans through "proc_table" and performs cleanup.  If
 993  * there is a process in the table, which shouldn't be here at
 994  * the current run level, then remv() kills the process.
 995  */
 996 static void
 997 remv()
 998 {
 999         struct PROC_TABLE       *process;
1000         struct CMD_LINE         cmd;
1001         char                    cmd_string[MAXCMDL];
1002         int                     change_level;
1003 
1004         change_level = (cur_state != prev_state ? TRUE : FALSE);
1005 
1006         /*
1007          * Clear the TOUCHED flag on all entries so that when we have
1008          * finished scanning inittab, we will be able to tell if we
1009          * have any processes for which there is no entry in inittab.
1010          */
1011         for (process = proc_table;
1012             (process < proc_table + num_proc); process++) {
1013                 process->p_flags &= ~TOUCHED;
1014         }
1015 
1016         /*
1017          * Scan all inittab entries.
1018          */
1019         while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
1020                 /* Scan for process which goes with this entry in inittab. */
1021                 for (process = proc_table;
1022                     (process < proc_table + num_proc); process++) {
1023                         if ((process->p_flags & OCCUPIED) == 0 ||
1024                             !id_eq(process->p_id, cmd.c_id))
1025                                 continue;
1026 
1027                         /*
1028                          * This slot contains the process we are looking for.
1029                          */
1030 
1031                         /*
1032                          * Is the cur_state SINGLE_USER or is this process
1033                          * marked as "off" or was this proc started by some
1034                          * mechanism other than LVL{a|b|c} and the current level
1035                          * does not support this process?
1036                          */
1037                         if (cur_state == SINGLE_USER ||
1038                             cmd.c_action == M_OFF ||
1039                             ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
1040                             (process->p_flags & DEMANDREQUEST) == 0)) {
1041                                 if (process->p_flags & LIVING) {
1042                                         /*
1043                                          * Touch this entry so we know we have
1044                                          * treated it.  Note that procs which
1045                                          * are already dead at this point and
1046                                          * should not be restarted are left
1047                                          * untouched.  This causes their slot to
1048                                          * be freed later after dead accounting
1049                                          * is done.
1050                                          */
1051                                         process->p_flags |= TOUCHED;
1052 
1053                                         if ((process->p_flags & KILLED) == 0) {
1054                                                 if (change_level) {
1055                                                         process->p_flags
1056                                                             |= WARNED;
1057                                                         (void) kill(
1058                                                             process->p_pid,
1059                                                             SIGTERM);
1060                                                 } else {
1061                                                         /*
1062                                                          * Fork a killing proc
1063                                                          * so "init" can
1064                                                          * continue without
1065                                                          * having to pause for
1066                                                          * TWARN seconds.
1067                                                          */
1068                                                         killproc(
1069                                                             process->p_pid);
1070                                                 }
1071                                                 process->p_flags |= KILLED;
1072                                         }
1073                                 }
1074                         } else {
1075                                 /*
1076                                  * Process can exist at current level.  If it is
1077                                  * still alive or a DEMANDREQUEST we touch it so
1078                                  * it will be left alone.  Otherwise we leave it
1079                                  * untouched so it will be accounted for and
1080                                  * cleaned up later in remv().  Dead
1081                                  * DEMANDREQUESTs will be accounted but not
1082                                  * freed.
1083                                  */
1084                                 if (process->p_flags &
1085                                     (LIVING|NOCLEANUP|DEMANDREQUEST))
1086                                         process->p_flags |= TOUCHED;
1087                         }
1088 
1089                         break;
1090                 }
1091         }
1092 
1093         st_write();
1094 
1095         /*
1096          * If this was a change of levels call, scan through the
1097          * process table for processes that were warned to die.  If any
1098          * are found that haven't left yet, sleep for TWARN seconds and
1099          * then send final terminations to any that haven't died yet.
1100          */
1101         if (change_level) {
1102 
1103                 /*
1104                  * Set the alarm for TWARN seconds on the assumption
1105                  * that there will be some that need to be waited for.
1106                  * This won't harm anything except we are guaranteed to
1107                  * wakeup in TWARN seconds whether we need to or not.
1108                  */
1109                 setimer(TWARN);
1110 
1111                 /*
1112                  * Scan for processes which should be dying.  We hope they
1113                  * will die without having to be sent a SIGKILL signal.
1114                  */
1115                 for (process = proc_table;
1116                     (process < proc_table + num_proc); process++) {
1117                         /*
1118                          * If this process should die, hasn't yet, and the
1119                          * TWARN time hasn't expired yet, wait for process
1120                          * to die or for timer to expire.
1121                          */
1122                         while (time_up == FALSE &&
1123                             (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
1124                             (WARNED|LIVING|OCCUPIED))
1125                                 (void) pause();
1126 
1127                         if (time_up == TRUE)
1128                                 break;
1129                 }
1130 
1131                 /*
1132                  * If we reached the end of the table without the timer
1133                  * expiring, then there are no procs which will have to be
1134                  * sent the SIGKILL signal.  If the timer has expired, then
1135                  * it is necessary to scan the table again and send signals
1136                  * to all processes which aren't going away nicely.
1137                  */
1138                 if (time_up == TRUE) {
1139                         for (process = proc_table;
1140                             (process < proc_table + num_proc); process++) {
1141                                 if ((process->p_flags &
1142                                     (WARNED|LIVING|OCCUPIED)) ==
1143                                     (WARNED|LIVING|OCCUPIED))
1144                                         (void) kill(process->p_pid, SIGKILL);
1145                         }
1146                 }
1147                 setimer(0);
1148         }
1149 
1150         /*
1151          * Rescan the proc_table for two kinds of entry, those marked LIVING,
1152          * NAMED, which don't have an entry in inittab (haven't been TOUCHED
1153          * by the above scanning), and haven't been sent kill signals, and
1154          * those entries marked not LIVING, NAMED.  The former procs are killed.
1155          * The latter have DEAD_PROCESS accounting done and the slot cleared.
1156          */
1157         for (process = proc_table;
1158             (process < proc_table + num_proc); process++) {
1159                 if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
1160                     == (LIVING|NAMED|OCCUPIED)) {
1161                         killproc(process->p_pid);
1162                         process->p_flags |= KILLED;
1163                 } else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
1164                     (NAMED|OCCUPIED)) {
1165                         (void) account(DEAD_PROCESS, process, NULL);
1166                         /*
1167                          * If this named proc hasn't been TOUCHED, then free the
1168                          * space. It has either died of it's own accord, but
1169                          * isn't respawnable or it was killed because it
1170                          * shouldn't exist at this level.
1171                          */
1172                         if ((process->p_flags & TOUCHED) == 0)
1173                                 process->p_flags = 0;
1174                 }
1175         }
1176 
1177         st_write();
1178 }
1179 
1180 /*
1181  * Extract the svc.startd command line and whether to restart it from its
1182  * inittab entry.
1183  */
1184 /*ARGSUSED*/
1185 static void
1186 process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
1187 {
1188         size_t sz;
1189 
1190         /* Save the command line. */
1191         if (sflg || rflg) {
1192                 /* Also append -r or -s. */
1193                 (void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1194                 (void) strlcat(startd_cline, " -", sizeof (startd_cline));
1195                 if (sflg)
1196                         sz = strlcat(startd_cline, "s", sizeof (startd_cline));
1197                 if (rflg)
1198                         sz = strlcat(startd_cline, "r", sizeof (startd_cline));
1199         } else {
1200                 sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1201         }
1202 
1203         if (sz >= sizeof (startd_cline)) {
1204                 console(B_TRUE,
1205                     "svc.startd command line too long.  Ignoring.\n");
1206                 startd_cline[0] = '\0';
1207                 return;
1208         }
1209 }
1210 
1211 /*
1212  * spawn_processes() scans inittab for entries which should be run at this
1213  * mode.  Processes which should be running but are not, are started.
1214  */
1215 static int
1216 spawn_processes()
1217 {
1218         struct PROC_TABLE               *pp;
1219         struct CMD_LINE                 cmd;
1220         char                            cmd_string[MAXCMDL];
1221         short                           lvl_mask;
1222         int                             status;
1223 
1224         /*
1225          * First check the "powerhit" flag.  If it is set, make sure the modes
1226          * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
1227          * on the "powerhit" flag by disallowing a new powerfail interrupt
1228          * between the test of the powerhit flag and the clearing of it.
1229          */
1230         if (wakeup.w_flags.w_powerhit) {
1231                 wakeup.w_flags.w_powerhit = 0;
1232                 op_modes = PF_MODES;
1233         }
1234         lvl_mask = state_to_mask(cur_state);
1235 
1236         /*
1237          * Scan through all the entries in inittab.
1238          */
1239         while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
1240                 if (id_eq(cmd.c_id, "smf")) {
1241                         process_startd_line(&cmd, cmd_string);
1242                         continue;
1243                 }
1244 
1245 retry_for_proc_slot:
1246 
1247                 /*
1248                  * Find out if there is a process slot for this entry already.
1249                  */
1250                 if ((pp = findpslot(&cmd)) == NULLPROC) {
1251                         /*
1252                          * we've run out of proc table entries
1253                          * increase proc_table.
1254                          */
1255                         increase_proc_table_size();
1256 
1257                         /*
1258                          * Retry now as we have an empty proc slot.
1259                          * In case increase_proc_table_size() fails,
1260                          * we will keep retrying.
1261                          */
1262                         goto retry_for_proc_slot;
1263                 }
1264 
1265                 /*
1266                  * If there is an entry, and it is marked as DEMANDREQUEST,
1267                  * one of the levels a, b, or c is in its levels mask, and
1268                  * the action field is ONDEMAND and ONDEMAND is a permissable
1269                  * mode, and the process is dead, then respawn it.
1270                  */
1271                 if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
1272                     (cmd.c_levels & MASK_abc) &&
1273                     (cmd.c_action & op_modes) == M_ONDEMAND) {
1274                         spawn(pp, &cmd);
1275                         continue;
1276                 }
1277 
1278                 /*
1279                  * If the action is not an action we are interested in,
1280                  * skip the entry.
1281                  */
1282                 if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
1283                     (cmd.c_levels & lvl_mask) == 0)
1284                         continue;
1285 
1286                 /*
1287                  * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
1288                  * ONDEMAND) and the action field is either OFF or the action
1289                  * field is ONCE or WAIT and the current level is the same as
1290                  * the last level, then skip this entry.  ONCE and WAIT only
1291                  * get run when the level changes.
1292                  */
1293                 if (op_modes == NORMAL_MODES &&
1294                     (cmd.c_action == M_OFF ||
1295                     (cmd.c_action & (M_ONCE|M_WAIT)) &&
1296                     cur_state == prev_state))
1297                         continue;
1298 
1299                 /*
1300                  * At this point we are interested in performing the action for
1301                  * this entry.  Actions fall into two categories, spinning off
1302                  * a process and not waiting, and spinning off a process and
1303                  * waiting for it to die.  If the action is ONCE, RESPAWN,
1304                  * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
1305                  * to die, for all other actions we do wait.
1306                  */
1307                 if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
1308                         spawn(pp, &cmd);
1309 
1310                 } else {
1311                         spawn(pp, &cmd);
1312                         while (waitproc(pp) == FAILURE)
1313                                 ;
1314                         (void) account(DEAD_PROCESS, pp, NULL);
1315                         pp->p_flags = 0;
1316                 }
1317         }
1318         return (status);
1319 }
1320 
1321 /*
1322  * spawn() spawns a shell, inserts the information about the process
1323  * process into the proc_table, and does the startup accounting.
1324  */
1325 static void
1326 spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
1327 {
1328         int             i;
1329         int             modes, maxfiles;
1330         time_t          now;
1331         struct PROC_TABLE tmproc, *oprocess;
1332 
1333         /*
1334          * The modes to be sent to efork() are 0 unless we are
1335          * spawning a LVLa, LVLb, or LVLc entry or we will be
1336          * waiting for the death of the child before continuing.
1337          */
1338         modes = NAMED;
1339         if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
1340             cur_state == LVLb || cur_state == LVLc)
1341                 modes |= DEMANDREQUEST;
1342         if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
1343                 modes |= NOCLEANUP;
1344 
1345         /*
1346          * If this is a respawnable process, check the threshold
1347          * information to avoid excessive respawns.
1348          */
1349         if (cmd->c_action & M_RESPAWN) {
1350                 /*
1351                  * Add NOCLEANUP to all respawnable commands so that the
1352                  * information about the frequency of respawns isn't lost.
1353                  */
1354                 modes |= NOCLEANUP;
1355                 (void) time(&now);
1356 
1357                 /*
1358                  * If no time is assigned, then this is the first time
1359                  * this command is being processed in this series.  Assign
1360                  * the current time.
1361                  */
1362                 if (process->p_time == 0L)
1363                         process->p_time = now;
1364 
1365                 if (process->p_count++ == SPAWN_LIMIT) {
1366 
1367                         if ((now - process->p_time) < SPAWN_INTERVAL) {
1368                                 /*
1369                                  * Process is respawning too rapidly.  Print
1370                                  * message and refuse to respawn it for now.
1371                                  */
1372                                 console(B_TRUE, "Command is respawning too "
1373                                     "rapidly. Check for possible errors.\n"
1374                                     "id:%4s \"%s\"\n",
1375                                     &cmd->c_id[0], &cmd->c_command[EXEC]);
1376                                 return;
1377                         }
1378                         process->p_time = now;
1379                         process->p_count = 0;
1380 
1381                 } else if (process->p_count > SPAWN_LIMIT) {
1382                         /*
1383                          * If process has been respawning too rapidly and
1384                          * the inhibit time limit hasn't expired yet, we
1385                          * refuse to respawn.
1386                          */
1387                         if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
1388                                 return;
1389                         process->p_time = now;
1390                         process->p_count = 0;
1391                 }
1392                 rsflag = TRUE;
1393         }
1394 
1395         /*
1396          * Spawn a child process to execute this command.
1397          */
1398         (void) sighold(SIGCLD);
1399         oprocess = process;
1400         while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
1401                 (void) pause();
1402 
1403         if (process == NULLPROC) {
1404 
1405                 /*
1406                  * We are the child.  We must make sure we get a different
1407                  * file pointer for our references to utmpx.  Otherwise our
1408                  * seeks and reads will compete with those of the parent.
1409                  */
1410                 endutxent();
1411 
1412                 /*
1413                  * Perform the accounting for the beginning of a process.
1414                  * Note that all processes are initially "INIT_PROCESS"es.
1415                  */
1416                 tmproc.p_id[0] = cmd->c_id[0];
1417                 tmproc.p_id[1] = cmd->c_id[1];
1418                 tmproc.p_id[2] = cmd->c_id[2];
1419                 tmproc.p_id[3] = cmd->c_id[3];
1420                 tmproc.p_pid = getpid();
1421                 tmproc.p_exit = 0;
1422                 (void) account(INIT_PROCESS, &tmproc,
1423                     prog_name(&cmd->c_command[EXEC]));
1424                 maxfiles = ulimit(UL_GDESLIM, 0);
1425                 for (i = 0; i < maxfiles; i++)
1426                         (void) fcntl(i, F_SETFD, FD_CLOEXEC);
1427 
1428                 /*
1429                  * Now exec a shell with the -c option and the command
1430                  * from inittab.
1431                  */
1432                 (void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
1433                     glob_envp);
1434                 console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
1435                     "= %d (exec of shell failed)\n", cmd->c_command, errno);
1436 
1437                 /*
1438                  * Don't come back so quickly that "init" doesn't have a
1439                  * chance to finish putting this child in "proc_table".
1440                  */
1441                 timer(20);
1442                 exit(1);
1443 
1444         }
1445 
1446         /*
1447          * We are the parent.  Insert the necessary
1448          * information in the proc_table.
1449          */
1450         process->p_id[0] = cmd->c_id[0];
1451         process->p_id[1] = cmd->c_id[1];
1452         process->p_id[2] = cmd->c_id[2];
1453         process->p_id[3] = cmd->c_id[3];
1454 
1455         st_write();
1456 
1457         (void) sigrelse(SIGCLD);
1458 }
1459 
1460 /*
1461  * findpslot() finds the old slot in the process table for the
1462  * command with the same id, or it finds an empty slot.
1463  */
1464 static struct PROC_TABLE *
1465 findpslot(struct CMD_LINE *cmd)
1466 {
1467         struct PROC_TABLE       *process;
1468         struct PROC_TABLE       *empty = NULLPROC;
1469 
1470         for (process = proc_table;
1471             (process < proc_table + num_proc); process++) {
1472                 if (process->p_flags & OCCUPIED &&
1473                     id_eq(process->p_id, cmd->c_id))
1474                         break;
1475 
1476                 /*
1477                  * If the entry is totally empty and "empty" is still 0,
1478                  * remember where this hole is and make sure the slot is
1479                  * zeroed out.
1480                  */
1481                 if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
1482                         empty = process;
1483                         process->p_id[0] = '\0';
1484                         process->p_id[1] = '\0';
1485                         process->p_id[2] = '\0';
1486                         process->p_id[3] = '\0';
1487                         process->p_pid = 0;
1488                         process->p_time = 0L;
1489                         process->p_count = 0;
1490                         process->p_flags = 0;
1491                         process->p_exit = 0;
1492                 }
1493         }
1494 
1495         /*
1496          * If there is no entry for this slot, then there should be an
1497          * empty slot.  If there is no empty slot, then we've run out
1498          * of proc_table space.  If the latter is true, empty will be
1499          * NULL and the caller will have to complain.
1500          */
1501         if (process == (proc_table + num_proc))
1502                 process = empty;
1503 
1504         return (process);
1505 }
1506 
1507 /*
1508  * getcmd() parses lines from inittab.  Each time it finds a command line
1509  * it will return TRUE as well as fill the passed CMD_LINE structure and
1510  * the shell command string.  When the end of inittab is reached, FALSE
1511  * is returned inittab is automatically opened if it is not currently open
1512  * and is closed when the end of the file is reached.
1513  */
1514 static FILE *fp_inittab = NULL;
1515 
1516 static int
1517 getcmd(struct CMD_LINE *cmd, char *shcmd)
1518 {
1519         char    *ptr;
1520         int     c, lastc, state;
1521         char    *ptr1;
1522         int     answer, i, proceed;
1523         struct  stat    sbuf;
1524         static char *actions[] = {
1525                 "off", "respawn", "ondemand", "once", "wait", "boot",
1526                 "bootwait", "powerfail", "powerwait", "initdefault",
1527                 "sysinit",
1528         };
1529         static short act_masks[] = {
1530                 M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
1531                 M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
1532         };
1533         /*
1534          * Only these actions will be allowed for entries which
1535          * are specified for single-user mode.
1536          */
1537         short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
1538 
1539         if (fp_inittab == NULL) {
1540                 /*
1541                  * Before attempting to open inittab we stat it to make
1542                  * sure it currently exists and is not empty.  We try
1543                  * several times because someone may have temporarily
1544                  * unlinked or truncated the file.
1545                  */
1546                 for (i = 0; i < 3; i++) {
1547                         if (stat(INITTAB, &sbuf) == -1) {
1548                                 if (i == 2) {
1549                                         console(B_TRUE,
1550                                             "Cannot stat %s, errno: %d\n",
1551                                             INITTAB, errno);
1552                                         return (FAILURE);
1553                                 } else {
1554                                         timer(3);
1555                                 }
1556                         } else if (sbuf.st_size < 10) {
1557                                 if (i == 2) {
1558                                         console(B_TRUE,
1559                                             "%s truncated or corrupted\n",
1560                                             INITTAB);
1561                                         return (FAILURE);
1562                                 } else {
1563                                         timer(3);
1564                                 }
1565                         } else {
1566                                 break;
1567                         }
1568                 }
1569 
1570                 /*
1571                  * If unable to open inittab, print error message and
1572                  * return FAILURE to caller.
1573                  */
1574                 if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
1575                         console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
1576                             errno);
1577                         return (FAILURE);
1578                 }
1579         }
1580 
1581         /*
1582          * Keep getting commands from inittab until you find a
1583          * good one or run out of file.
1584          */
1585         for (answer = FALSE; answer == FALSE; ) {
1586                 /*
1587                  * Zero out the cmd itself before trying next line.
1588                  */
1589                 bzero(cmd, sizeof (struct CMD_LINE));
1590 
1591                 /*
1592                  * Read in lines of inittab, parsing at colons, until a line is
1593                  * read in which doesn't end with a backslash.  Do not start if
1594                  * the first character read is an EOF.  Note that this means
1595                  * that lines which don't end in a newline are still processed,
1596                  * since the "for" will terminate normally once started,
1597                  * regardless of whether line terminates with a newline or EOF.
1598                  */
1599                 state = FAILURE;
1600                 if ((c = fgetc(fp_inittab)) == EOF) {
1601                         answer = FALSE;
1602                         (void) fclose(fp_inittab);
1603                         fp_inittab = NULL;
1604                         break;
1605                 }
1606 
1607                 for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
1608                     proceed && c != EOF;
1609                     lastc = c, c = fgetc(fp_inittab)) {
1610                         /* If we're not in the FAILURE state and haven't */
1611                         /* yet reached the shell command field, process  */
1612                         /* the line, otherwise just look for a real end  */
1613                         /* of line.                                      */
1614                         if (state != FAILURE && state != COMMAND) {
1615                         /*
1616                          * Squeeze out spaces and tabs.
1617                          */
1618                         if (c == ' ' || c == '\t')
1619                                 continue;
1620 
1621                         /*
1622                          * Ignore characters in a comment, except for the \n.
1623                          */
1624                         if (state == COMMENT) {
1625                                 if (c == '\n') {
1626                                         lastc = ' ';
1627                                         break;
1628                                 } else {
1629                                         continue;
1630                                 }
1631                         }
1632 
1633                         /*
1634                          * Detect comments (lines whose first non-whitespace
1635                          * character is '#') by checking that we're at the
1636                          * beginning of a line, have seen a '#', and haven't
1637                          * yet accumulated any characters.
1638                          */
1639                         if (state == ID && c == '#' && ptr == shcmd) {
1640                                 state = COMMENT;
1641                                 continue;
1642                         }
1643 
1644                         /*
1645                          * If the character is a ':', then check the
1646                          * previous field for correctness and advance
1647                          * to the next field.
1648                          */
1649                         if (c == ':') {
1650                                 switch (state) {
1651 
1652                                 case ID :
1653                                 /*
1654                                  * Check to see that there are only
1655                                  * 1 to 4 characters for the id.
1656                                  */
1657                                 if ((i = ptr - shcmd) < 1 || i > 4) {
1658                                         state = FAILURE;
1659                                 } else {
1660                                         bcopy(shcmd, &cmd->c_id[0], i);
1661                                         ptr = shcmd;
1662                                         state = LEVELS;
1663                                 }
1664                                 break;
1665 
1666                                 case LEVELS :
1667                                 /*
1668                                  * Build a mask for all the levels for
1669                                  * which this command will be legal.
1670                                  */
1671                                 for (cmd->c_levels = 0, ptr1 = shcmd;
1672                                     ptr1 < ptr; ptr1++) {
1673                                         int mask;
1674                                         if (lvlname_to_mask(*ptr1,
1675                                             &mask) == -1) {
1676                                                 state = FAILURE;
1677                                                 break;
1678                                         }
1679                                         cmd->c_levels |= mask;
1680                                 }
1681                                 if (state != FAILURE) {
1682                                         state = ACTION;
1683                                         ptr = shcmd;    /* Reset the buffer */
1684                                 }
1685                                 break;
1686 
1687                                 case ACTION :
1688                                 /*
1689                                  * Null terminate the string in shcmd buffer and
1690                                  * then try to match against legal actions.  If
1691                                  * the field is of length 0, then the default of
1692                                  * "RESPAWN" is used if the id is numeric,
1693                                  * otherwise the default is "OFF".
1694                                  */
1695                                 if (ptr == shcmd) {
1696                                         if (isdigit(cmd->c_id[0]) &&
1697                                             (cmd->c_id[1] == '\0' ||
1698                                             isdigit(cmd->c_id[1])) &&
1699                                             (cmd->c_id[2] == '\0' ||
1700                                             isdigit(cmd->c_id[2])) &&
1701                                             (cmd->c_id[3] == '\0' ||
1702                                             isdigit(cmd->c_id[3])))
1703                                                 cmd->c_action = M_RESPAWN;
1704                                         else
1705                                                 cmd->c_action = M_OFF;
1706                                 } else {
1707                                         for (cmd->c_action = 0, i = 0,
1708                                             *ptr = '\0';
1709                                             i <
1710                                             sizeof (actions)/sizeof (char *);
1711                                             i++) {
1712                                         if (strcmp(shcmd, actions[i]) == 0) {
1713                                                 if ((cmd->c_levels & MASKSU) &&
1714                                                     !(act_masks[i] & su_acts))
1715                                                         cmd->c_action = 0;
1716                                                 else
1717                                                         cmd->c_action =
1718                                                             act_masks[i];
1719                                                 break;
1720                                         }
1721                                         }
1722                                 }
1723 
1724                                 /*
1725                                  * If the action didn't match any legal action,
1726                                  * set state to FAILURE.
1727                                  */
1728                                 if (cmd->c_action == 0) {
1729                                         state = FAILURE;
1730                                 } else {
1731                                         state = COMMAND;
1732                                         (void) strcpy(shcmd, "exec ");
1733                                 }
1734                                 ptr = shcmd + EXEC;
1735                                 break;
1736                                 }
1737                                 continue;
1738                         }
1739                 }
1740 
1741                 /* If the character is a '\n', then this is the end of a */
1742                 /* line.  If the '\n' wasn't preceded by a backslash, */
1743                 /* it is also the end of an inittab command.  If it was */
1744                 /* preceded by a backslash then the next line is a */
1745                 /* continuation.  Note that the continuation '\n' falls */
1746                 /* through and is treated like other characters and is */
1747                 /* stored in the shell command line. */
1748                 if (c == '\n' && lastc != '\\') {
1749                         proceed = FALSE;
1750                         *ptr = '\0';
1751                         break;
1752                 }
1753 
1754                 /* For all other characters just stuff them into the */
1755                 /* command as long as there aren't too many of them. */
1756                 /* Make sure there is room for a terminating '\0' also. */
1757                 if (ptr >= shcmd + MAXCMDL - 1)
1758                         state = FAILURE;
1759                 else
1760                         *ptr++ = (char)c;
1761 
1762                 /* If the character we just stored was a quoted */
1763                 /* backslash, then change "c" to '\0', so that this     */
1764                 /* backslash will not cause a subsequent '\n' to appear */
1765                 /* quoted.  In otherwords '\' '\' '\n' is the real end */
1766                 /* of a command, while '\' '\n' is a continuation. */
1767                 if (c == '\\' && lastc == '\\')
1768                         c = '\0';
1769                 }
1770 
1771                 /*
1772                  * Make sure all the fields are properly specified
1773                  * for a good command line.
1774                  */
1775                 if (state == COMMAND) {
1776                         answer = TRUE;
1777                         cmd->c_command = shcmd;
1778 
1779                         /*
1780                          * If no default level was supplied, insert
1781                          * all numerical levels.
1782                          */
1783                         if (cmd->c_levels == 0)
1784                                 cmd->c_levels = MASK_NUMERIC;
1785 
1786                         /*
1787                          * If no action has been supplied, declare this
1788                          * entry to be OFF.
1789                          */
1790                         if (cmd->c_action == 0)
1791                                 cmd->c_action = M_OFF;
1792 
1793                         /*
1794                          * If no shell command has been supplied, make sure
1795                          * there is a null string in the command field.
1796                          */
1797                         if (ptr == shcmd + EXEC)
1798                                 *shcmd = '\0';
1799                 } else
1800                         answer = FALSE;
1801 
1802                 /*
1803                  * If we have reached the end of inittab, then close it
1804                  * and quit trying to find a good command line.
1805                  */
1806                 if (c == EOF) {
1807                         (void) fclose(fp_inittab);
1808                         fp_inittab = NULL;
1809                         break;
1810                 }
1811         }
1812         return (answer);
1813 }
1814 
1815 /*
1816  * lvlname_to_state(): convert the character name of a state to its level
1817  * (its corresponding signal number).
1818  */
1819 static int
1820 lvlname_to_state(char name)
1821 {
1822         int i;
1823         for (i = 0; i < LVL_NELEMS; i++) {
1824                 if (lvls[i].lvl_name == name)
1825                         return (lvls[i].lvl_state);
1826         }
1827         return (-1);
1828 }
1829 
1830 /*
1831  * state_to_name(): convert the level to the character name.
1832  */
1833 static char
1834 state_to_name(int state)
1835 {
1836         int i;
1837         for (i = 0; i < LVL_NELEMS; i++) {
1838                 if (lvls[i].lvl_state == state)
1839                         return (lvls[i].lvl_name);
1840         }
1841         return (-1);
1842 }
1843 
1844 /*
1845  * state_to_mask(): return the mask corresponding to a signal number
1846  */
1847 static int
1848 state_to_mask(int state)
1849 {
1850         int i;
1851         for (i = 0; i < LVL_NELEMS; i++) {
1852                 if (lvls[i].lvl_state == state)
1853                         return (lvls[i].lvl_mask);
1854         }
1855         return (0);     /* return 0, since that represents an empty mask */
1856 }
1857 
1858 /*
1859  * lvlname_to_mask(): return the mask corresponding to a levels character name
1860  */
1861 static int
1862 lvlname_to_mask(char name, int *mask)
1863 {
1864         int i;
1865         for (i = 0; i < LVL_NELEMS; i++) {
1866                 if (lvls[i].lvl_name == name) {
1867                         *mask = lvls[i].lvl_mask;
1868                         return (0);
1869                 }
1870         }
1871         return (-1);
1872 }
1873 
1874 /*
1875  * state_to_flags(): return the flags corresponding to a runlevel.  These
1876  * indicate properties of that runlevel.
1877  */
1878 static int
1879 state_to_flags(int state)
1880 {
1881         int i;
1882         for (i = 0; i < LVL_NELEMS; i++) {
1883                 if (lvls[i].lvl_state == state)
1884                         return (lvls[i].lvl_flags);
1885         }
1886         return (0);
1887 }
1888 
1889 /*
1890  * killproc() creates a child which kills the process specified by pid.
1891  */
1892 void
1893 killproc(pid_t pid)
1894 {
1895         struct PROC_TABLE       *process;
1896 
1897         (void) sighold(SIGCLD);
1898         while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
1899                 (void) pause();
1900         (void) sigrelse(SIGCLD);
1901 
1902         if (process == NULLPROC) {
1903                 /*
1904                  * efork() sets all signal handlers to the default, so reset
1905                  * the ALRM handler to make timer() work as expected.
1906                  */
1907                 (void) sigset(SIGALRM, alarmclk);
1908 
1909                 /*
1910                  * We are the child.  Try to terminate the process nicely
1911                  * first using SIGTERM and if it refuses to die in TWARN
1912                  * seconds kill it with SIGKILL.
1913                  */
1914                 (void) kill(pid, SIGTERM);
1915                 (void) timer(TWARN);
1916                 (void) kill(pid, SIGKILL);
1917                 (void) exit(0);
1918         }
1919 }
1920 
1921 /*
1922  * Set up the default environment for all procs to be forked from init.
1923  * Read the values from the /etc/default/init file, except for PATH.  If
1924  * there's not enough room in the environment array, the environment
1925  * lines that don't fit are silently discarded.
1926  */
1927 void
1928 init_env()
1929 {
1930         char    line[MAXCMDL];
1931         FILE    *fp;
1932         int     inquotes, length, wslength;
1933         char    *tokp, *cp1, *cp2;
1934 
1935         glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
1936         (void) strcpy(glob_envp[0], DEF_PATH);
1937         glob_envn = 1;
1938 
1939         if (rflg) {
1940                 glob_envp[1] =
1941                     malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
1942                 (void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
1943                 ++glob_envn;
1944         } else if (bflg == 1) {
1945                 glob_envp[1] =
1946                     malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
1947                 (void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
1948                 ++glob_envn;
1949         }
1950 
1951         if ((fp = fopen(ENVFILE, "r")) == NULL) {
1952                 console(B_TRUE,
1953                     "Cannot open %s. Environment not initialized.\n",
1954                     ENVFILE);
1955         } else {
1956                 while (fgets(line, MAXCMDL - 1, fp) != NULL &&
1957                     glob_envn < MAXENVENT - 2) {
1958                         /*
1959                          * Toss newline
1960                          */
1961                         length = strlen(line);
1962                         if (line[length - 1] == '\n')
1963                                 line[length - 1] = '\0';
1964 
1965                         /*
1966                          * Ignore blank or comment lines.
1967                          */
1968                         if (line[0] == '#' || line[0] == '\0' ||
1969                             (wslength = strspn(line, " \t\n")) ==
1970                             strlen(line) ||
1971                             strchr(line, '#') == line + wslength)
1972                                 continue;
1973 
1974                         /*
1975                          * First make a pass through the line and change
1976                          * any non-quoted semi-colons to blanks so they
1977                          * will be treated as token separators below.
1978                          */
1979                         inquotes = 0;
1980                         for (cp1 = line; *cp1 != '\0'; cp1++) {
1981                                 if (*cp1 == '"') {
1982                                         if (inquotes == 0)
1983                                                 inquotes = 1;
1984                                         else
1985                                                 inquotes = 0;
1986                                 } else if (*cp1 == ';') {
1987                                         if (inquotes == 0)
1988                                                 *cp1 = ' ';
1989                                 }
1990                         }
1991 
1992                         /*
1993                          * Tokens within the line are separated by blanks
1994                          *  and tabs.  For each token in the line which
1995                          * contains a '=' we strip out any quotes and then
1996                          * stick the token in the environment array.
1997                          */
1998                         if ((tokp = strtok(line, " \t")) == NULL)
1999                                 continue;
2000                         do {
2001                                 if (strchr(tokp, '=') == NULL)
2002                                         continue;
2003                                 length = strlen(tokp);
2004                                 while ((cp1 = strpbrk(tokp, "\"\'")) != NULL) {
2005                                         for (cp2 = cp1;
2006                                             cp2 < &tokp[length]; cp2++)
2007                                                 *cp2 = *(cp2 + 1);
2008                                         length--;
2009                                 }
2010 
2011                                 if (strncmp(tokp, "CMASK=",
2012                                     sizeof ("CMASK=") - 1) == 0) {
2013                                         long t;
2014 
2015                                         /* We know there's an = */
2016                                         t = strtol(strchr(tokp, '=') + 1, NULL,
2017                                             8);
2018 
2019                                         /* Sanity */
2020                                         if (t <= 077 && t >= 0)
2021                                                 cmask = (int)t;
2022                                         (void) umask(cmask);
2023                                         continue;
2024                                 }
2025                                 glob_envp[glob_envn] =
2026                                     malloc((unsigned)(length + 1));
2027                                 (void) strcpy(glob_envp[glob_envn], tokp);
2028                                 if (++glob_envn >= MAXENVENT - 1)
2029                                         break;
2030                         } while ((tokp = strtok(NULL, " \t")) != NULL);
2031                 }
2032 
2033                 /*
2034                  * Append a null pointer to the environment array
2035                  * to mark its end.
2036                  */
2037                 glob_envp[glob_envn] = NULL;
2038                 (void) fclose(fp);
2039         }
2040 }
2041 
2042 /*
2043  * boot_init(): Do initialization things that should be done at boot.
2044  */
2045 void
2046 boot_init()
2047 {
2048         int i;
2049         struct PROC_TABLE *process, *oprocess;
2050         struct CMD_LINE cmd;
2051         char    line[MAXCMDL];
2052         char    svc_aux[SVC_AUX_SIZE];
2053         char    init_svc_fmri[SVC_FMRI_SIZE];
2054         char *old_path;
2055         int maxfiles;
2056 
2057         /* Use INIT_PATH for sysinit cmds */
2058         old_path = glob_envp[0];
2059         glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
2060         (void) strcpy(glob_envp[0], INIT_PATH);
2061 
2062         /*
2063          * Scan inittab(4) and process the special svc.startd entry, initdefault
2064          * and sysinit entries.
2065          */
2066         while (getcmd(&cmd, &line[0]) == TRUE) {
2067                 if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf")) {
2068                         process_startd_line(&cmd, line);
2069                         (void) snprintf(startd_svc_aux, SVC_AUX_SIZE,
2070                             INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2071                 } else if (cmd.c_action == M_INITDEFAULT) {
2072                         /*
2073                          * initdefault is no longer meaningful, as the SMF
2074                          * milestone controls what (legacy) run level we
2075                          * boot to.
2076                          */
2077                         console(B_TRUE,
2078                             "Ignoring legacy \"initdefault\" entry.\n");
2079                 } else if (cmd.c_action == M_SYSINIT) {
2080                         /*
2081                          * Execute the "sysinit" entry and wait for it to
2082                          * complete.  No bookkeeping is performed on these
2083                          * entries because we avoid writing to the file system
2084                          * until after there has been an chance to check it.
2085                          */
2086                         if (process = findpslot(&cmd)) {
2087                                 (void) sighold(SIGCLD);
2088                                 (void) snprintf(svc_aux, SVC_AUX_SIZE,
2089                                     INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2090                                 (void) snprintf(init_svc_fmri, SVC_FMRI_SIZE,
2091                                     SVC_INIT_PREFIX INITTAB_ENTRY_ID_STR_FORMAT,
2092                                     cmd.c_id);
2093                                 if (legacy_tmpl >= 0) {
2094                                         (void) ct_pr_tmpl_set_svc_fmri(
2095                                             legacy_tmpl, init_svc_fmri);
2096                                         (void) ct_pr_tmpl_set_svc_aux(
2097                                             legacy_tmpl, svc_aux);
2098                                 }
2099 
2100                                 for (oprocess = process;
2101                                     (process = efork(M_OFF, oprocess,
2102                                     (NAMED|NOCLEANUP))) == NO_ROOM;
2103                                     /* CSTYLED */)
2104                                         ;
2105                                 (void) sigrelse(SIGCLD);
2106 
2107                                 if (process == NULLPROC) {
2108                                         maxfiles = ulimit(UL_GDESLIM, 0);
2109 
2110                                         for (i = 0; i < maxfiles; i++)
2111                                                 (void) fcntl(i, F_SETFD,
2112                                                     FD_CLOEXEC);
2113                                         (void) execle(SH, "INITSH", "-c",
2114                                             cmd.c_command,
2115                                             (char *)0, glob_envp);
2116                                         console(B_TRUE,
2117 "Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
2118                                             cmd.c_command, errno);
2119                                         exit(1);
2120                                 } else
2121                                         while (waitproc(process) == FAILURE)
2122                                                 ;
2123                                 process->p_flags = 0;
2124                                 st_write();
2125                         }
2126                 }
2127         }
2128 
2129         /* Restore the path. */
2130         free(glob_envp[0]);
2131         glob_envp[0] = old_path;
2132 
2133         /*
2134          * This will enable st_write() to complain about init_state_file.
2135          */
2136         booting = 0;
2137 
2138         /*
2139          * If the /etc/ioctl.syscon didn't exist or had invalid contents write
2140          * out a correct version.
2141          */
2142         if (write_ioctl)
2143                 write_ioctl_syscon();
2144 
2145         /*
2146          * Start svc.startd(1M), which does most of the work.
2147          */
2148         if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
2149                 /* Start svc.startd. */
2150                 if (startd_run(startd_cline, startd_tmpl, 0) == -1)
2151                         cur_state = SINGLE_USER;
2152         } else {
2153                 console(B_TRUE, "Absent svc.startd entry or bad "
2154                     "contract template.  Not starting svc.startd.\n");
2155                 enter_maintenance();
2156         }
2157 }
2158 
2159 /*
2160  * init_signals(): Initialize all signals to either be caught or ignored.
2161  */
2162 void
2163 init_signals(void)
2164 {
2165         struct sigaction act;
2166         int i;
2167 
2168         /*
2169          * Start by ignoring all signals, then selectively re-enable some.
2170          * The SIG_IGN disposition will only affect asynchronous signals:
2171          * any signal that we trigger synchronously that doesn't end up
2172          * being handled by siglvl() will be forcibly delivered by the kernel.
2173          */
2174         for (i = SIGHUP; i <= SIGRTMAX; i++)
2175                 (void) sigset(i, SIG_IGN);
2176 
2177         /*
2178          * Handle all level-changing signals using siglvl() and set sa_mask so
2179          * that all level-changing signals are blocked while in siglvl().
2180          */
2181         act.sa_handler = siglvl;
2182         act.sa_flags = SA_SIGINFO;
2183         (void) sigemptyset(&act.sa_mask);
2184 
2185         (void) sigaddset(&act.sa_mask, LVLQ);
2186         (void) sigaddset(&act.sa_mask, LVL0);
2187         (void) sigaddset(&act.sa_mask, LVL1);
2188         (void) sigaddset(&act.sa_mask, LVL2);
2189         (void) sigaddset(&act.sa_mask, LVL3);
2190         (void) sigaddset(&act.sa_mask, LVL4);
2191         (void) sigaddset(&act.sa_mask, LVL5);
2192         (void) sigaddset(&act.sa_mask, LVL6);
2193         (void) sigaddset(&act.sa_mask, SINGLE_USER);
2194         (void) sigaddset(&act.sa_mask, LVLa);
2195         (void) sigaddset(&act.sa_mask, LVLb);
2196         (void) sigaddset(&act.sa_mask, LVLc);
2197 
2198         (void) sigaction(LVLQ, &act, NULL);
2199         (void) sigaction(LVL0, &act, NULL);
2200         (void) sigaction(LVL1, &act, NULL);
2201         (void) sigaction(LVL2, &act, NULL);
2202         (void) sigaction(LVL3, &act, NULL);
2203         (void) sigaction(LVL4, &act, NULL);
2204         (void) sigaction(LVL5, &act, NULL);
2205         (void) sigaction(LVL6, &act, NULL);
2206         (void) sigaction(SINGLE_USER, &act, NULL);
2207         (void) sigaction(LVLa, &act, NULL);
2208         (void) sigaction(LVLb, &act, NULL);
2209         (void) sigaction(LVLc, &act, NULL);
2210 
2211         (void) sigset(SIGALRM, alarmclk);
2212         alarmclk();
2213 
2214         (void) sigset(SIGCLD, childeath);
2215         (void) sigset(SIGPWR, powerfail);
2216 }
2217 
2218 /*
2219  * Set up pipe for "godchildren". If the file exists and is a pipe just open
2220  * it. Else, if the file system is r/w create it.  Otherwise, defer its
2221  * creation and open until after /var/run has been mounted.  This function is
2222  * only called on startup and when explicitly requested via LVLQ.
2223  */
2224 void
2225 setup_pipe()
2226 {
2227         struct stat stat_buf;
2228         struct statvfs statvfs_buf;
2229         struct sigaction act;
2230 
2231         /*
2232          * Always close the previous pipe descriptor as the mounted filesystems
2233          * may have changed.
2234          */
2235         if (Pfd >= 0)
2236                 (void) close(Pfd);
2237 
2238         if ((stat(INITPIPE, &stat_buf) == 0) &&
2239             ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
2240                 Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2241         else
2242                 if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
2243                     ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
2244                         (void) unlink(INITPIPE);
2245                         (void) mknod(INITPIPE, S_IFIFO | 0600, 0);
2246                         Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2247                 }
2248 
2249         if (Pfd >= 0) {
2250                 (void) ioctl(Pfd, I_SETSIG, S_INPUT);
2251                 /*
2252                  * Read pipe in message discard mode.
2253                  */
2254                 (void) ioctl(Pfd, I_SRDOPT, RMSGD);
2255 
2256                 act.sa_handler = sigpoll;
2257                 act.sa_flags = 0;
2258                 (void) sigemptyset(&act.sa_mask);
2259                 (void) sigaddset(&act.sa_mask, SIGCLD);
2260                 (void) sigaction(SIGPOLL, &act, NULL);
2261         }
2262 }
2263 
2264 /*
2265  * siglvl - handle an asynchronous signal from init(1M) telling us that we
2266  * should change the current run level.  We set new_state accordingly.
2267  */
2268 void
2269 siglvl(int sig, siginfo_t *sip, ucontext_t *ucp)
2270 {
2271         struct PROC_TABLE *process;
2272         struct sigaction act;
2273 
2274         /*
2275          * If the signal was from the kernel (rather than init(1M)) then init
2276          * itself tripped the signal.  That is, we might have a bug and tripped
2277          * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
2278          * such a case we reset the disposition to SIG_DFL, block all signals
2279          * in uc_mask but the current one, and return to the interrupted ucp
2280          * to effect an appropriate death.  The kernel will then restart us.
2281          *
2282          * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
2283          * the kernel can send us when it wants to effect an orderly reboot.
2284          * For this case we must also verify si_code is zero, rather than a
2285          * code such as FPE_INTDIV which a bug might have triggered.
2286          */
2287         if (sip != NULL && SI_FROMKERNEL(sip) &&
2288             (sig != SIGFPE || sip->si_code == 0)) {
2289 
2290                 (void) sigemptyset(&act.sa_mask);
2291                 act.sa_handler = SIG_DFL;
2292                 act.sa_flags = 0;
2293                 (void) sigaction(sig, &act, NULL);
2294 
2295                 (void) sigfillset(&ucp->uc_sigmask);
2296                 (void) sigdelset(&ucp->uc_sigmask, sig);
2297                 ucp->uc_flags |= UC_SIGMASK;
2298 
2299                 (void) setcontext(ucp);
2300         }
2301 
2302         /*
2303          * If the signal received is a LVLQ signal, do not really
2304          * change levels, just restate the current level.  If the
2305          * signal is not a LVLQ, set the new level to the signal
2306          * received.
2307          */
2308         if (sig == LVLQ) {
2309                 new_state = cur_state;
2310                 lvlq_received = B_TRUE;
2311         } else {
2312                 new_state = sig;
2313         }
2314 
2315         /*
2316          * Clear all times and repeat counts in the process table
2317          * since either the level is changing or the user has editted
2318          * the inittab file and wants us to look at it again.
2319          * If the user has fixed a typo, we don't want residual timing
2320          * data preventing the fixed command line from executing.
2321          */
2322         for (process = proc_table;
2323             (process < proc_table + num_proc); process++) {
2324                 process->p_time = 0L;
2325                 process->p_count = 0;
2326         }
2327 
2328         /*
2329          * Set the flag to indicate that a "user signal" was received.
2330          */
2331         wakeup.w_flags.w_usersignal = 1;
2332 }
2333 
2334 
2335 /*
2336  * alarmclk
2337  */
2338 static void
2339 alarmclk()
2340 {
2341         time_up = TRUE;
2342 }
2343 
2344 /*
2345  * childeath_single():
2346  *
2347  * This used to be the SIGCLD handler and it was set with signal()
2348  * (as opposed to sigset()).  When a child exited we'd come to the
2349  * handler, wait for the child, and reenable the handler with
2350  * signal() just before returning.  The implementation of signal()
2351  * checks with waitid() for waitable children and sends a SIGCLD
2352  * if there are some.  If children are exiting faster than the
2353  * handler can run we keep sending signals and the handler never
2354  * gets to return and eventually the stack runs out and init dies.
2355  * To prevent that we set the handler with sigset() so the handler
2356  * doesn't need to be reset, and in childeath() (see below) we
2357  * call childeath_single() as long as there are children to be
2358  * waited for.  If a child exits while init is in the handler a
2359  * SIGCLD will be pending and delivered on return from the handler.
2360  * If the child was already waited for the handler will have nothing
2361  * to do and return, otherwise the child will be waited for.
2362  */
2363 static void
2364 childeath_single(pid_t pid, int status)
2365 {
2366         struct PROC_TABLE       *process;
2367         struct pidlist          *pp;
2368 
2369         /*
2370          * Scan the process table to see if we are interested in this process.
2371          */
2372         for (process = proc_table;
2373             (process < proc_table + num_proc); process++) {
2374                 if ((process->p_flags & (LIVING|OCCUPIED)) ==
2375                     (LIVING|OCCUPIED) && process->p_pid == pid) {
2376 
2377                         /*
2378                          * Mark this process as having died and store the exit
2379                          * status.  Also set the wakeup flag for a dead child
2380                          * and break out of the loop.
2381                          */
2382                         process->p_flags &= ~LIVING;
2383                         process->p_exit = (short)status;
2384                         wakeup.w_flags.w_childdeath = 1;
2385 
2386                         return;
2387                 }
2388         }
2389 
2390         /*
2391          * No process was found above, look through auxiliary list.
2392          */
2393         (void) sighold(SIGPOLL);
2394         pp = Plhead;
2395         while (pp) {
2396                 if (pid > pp->pl_pid) {
2397                         /*
2398                          * Keep on looking.
2399                          */
2400                         pp = pp->pl_next;
2401                         continue;
2402                 } else if (pid < pp->pl_pid) {
2403                         /*
2404                          * Not in the list.
2405                          */
2406                         break;
2407                 } else {
2408                         /*
2409                          * This is a dead "godchild".
2410                          */
2411                         pp->pl_dflag = 1;
2412                         pp->pl_exit = (short)status;
2413                         wakeup.w_flags.w_childdeath = 1;
2414                         Gchild = 1;     /* Notice to call cleanaux(). */
2415                         break;
2416                 }
2417         }
2418 
2419         (void) sigrelse(SIGPOLL);
2420 }
2421 
2422 /* ARGSUSED */
2423 static void
2424 childeath(int signo)
2425 {
2426         pid_t pid;
2427         int status;
2428 
2429         while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
2430                 childeath_single(pid, status);
2431 }
2432 
2433 static void
2434 powerfail()
2435 {
2436         (void) nice(-19);
2437         wakeup.w_flags.w_powerhit = 1;
2438 }
2439 
2440 /*
2441  * efork() forks a child and the parent inserts the process in its table
2442  * of processes that are directly a result of forks that it has performed.
2443  * The child just changes the "global" with the process id for this process
2444  * to it's new value.
2445  * If efork() is called with a pointer into the proc_table it uses that slot,
2446  * otherwise it searches for a free slot.  Regardless of how it was called,
2447  * it returns the pointer to the proc_table entry
2448  *
2449  * The SIGCLD signal is blocked (held) before calling efork()
2450  * and is unblocked (released) after efork() returns.
2451  *
2452  * Ideally, this should be rewritten to use modern signal semantics.
2453  */
2454 static struct PROC_TABLE *
2455 efork(int action, struct PROC_TABLE *process, int modes)
2456 {
2457         pid_t   childpid;
2458         struct PROC_TABLE *proc;
2459         int             i;
2460         /*
2461          * Freshen up the proc_table, removing any entries for dead processes
2462          * that don't have NOCLEANUP set.  Perform the necessary accounting.
2463          */
2464         for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
2465                 if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
2466                     (OCCUPIED)) {
2467                         /*
2468                          * Is this a named process?
2469                          * If so, do the necessary bookkeeping.
2470                          */
2471                         if (proc->p_flags & NAMED)
2472                                 (void) account(DEAD_PROCESS, proc, NULL);
2473 
2474                         /*
2475                          * Free this entry for new usage.
2476                          */
2477                         proc->p_flags = 0;
2478                 }
2479         }
2480 
2481         while ((childpid = fork()) == FAILURE) {
2482                 /*
2483                  * Shorten the alarm timer in case someone else's child dies
2484                  * and free up a slot in the process table.
2485                  */
2486                 setimer(5);
2487 
2488                 /*
2489                  * Wait for some children to die.  Since efork()
2490                  * is always called with SIGCLD blocked, unblock
2491                  * it here so that child death signals can come in.
2492                  */
2493                 (void) sigrelse(SIGCLD);
2494                 (void) pause();
2495                 (void) sighold(SIGCLD);
2496                 setimer(0);
2497         }
2498 
2499         if (childpid != 0) {
2500 
2501                 if (process == NULLPROC) {
2502                         /*
2503                          * No proc table pointer specified so search
2504                          * for a free slot.
2505                          */
2506                         for (process = proc_table;  process->p_flags != 0 &&
2507                             (process < proc_table + num_proc); process++)
2508                                         ;
2509 
2510                         if (process == (proc_table + num_proc)) {
2511                                 int old_proc_table_size = num_proc;
2512 
2513                                 /* Increase the process table size */
2514                                 increase_proc_table_size();
2515                                 if (old_proc_table_size == num_proc) {
2516                                         /* didn't grow: memory failure */
2517                                         return (NO_ROOM);
2518                                 } else {
2519                                         process =
2520                                             proc_table + old_proc_table_size;
2521                                 }
2522                         }
2523 
2524                         process->p_time = 0L;
2525                         process->p_count = 0;
2526                 }
2527                 process->p_id[0] = '\0';
2528                 process->p_id[1] = '\0';
2529                 process->p_id[2] = '\0';
2530                 process->p_id[3] = '\0';
2531                 process->p_pid = childpid;
2532                 process->p_flags = (LIVING | OCCUPIED | modes);
2533                 process->p_exit = 0;
2534 
2535                 st_write();
2536         } else {
2537                 if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
2538                         (void) setpgrp();
2539 
2540                 process = NULLPROC;
2541 
2542                 /*
2543                  * Reset all signals to the system defaults.
2544                  */
2545                 for (i = SIGHUP; i <= SIGRTMAX; i++)
2546                         (void) sigset(i, SIG_DFL);
2547 
2548                 /*
2549                  * POSIX B.2.2.2 advises that init should set SIGTTOU,
2550                  * SIGTTIN, and SIGTSTP to SIG_IGN.
2551                  *
2552                  * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
2553                  * for backward compatibility.
2554                  */
2555                 (void) sigset(SIGTTIN, SIG_IGN);
2556                 (void) sigset(SIGTTOU, SIG_IGN);
2557                 (void) sigset(SIGTSTP, SIG_IGN);
2558                 (void) sigset(SIGXCPU, SIG_IGN);
2559                 (void) sigset(SIGXFSZ, SIG_IGN);
2560         }
2561         return (process);
2562 }
2563 
2564 
2565 /*
2566  * waitproc() waits for a specified process to die.  For this function to
2567  * work, the specified process must already in the proc_table.  waitproc()
2568  * returns the exit status of the specified process when it dies.
2569  */
2570 static long
2571 waitproc(struct PROC_TABLE *process)
2572 {
2573         int             answer;
2574         sigset_t        oldmask, newmask, zeromask;
2575 
2576         (void) sigemptyset(&zeromask);
2577         (void) sigemptyset(&newmask);
2578 
2579         (void) sigaddset(&newmask, SIGCLD);
2580 
2581         /* Block SIGCLD and save the current signal mask */
2582         if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
2583                 perror("SIG_BLOCK error");
2584 
2585         /*
2586          * Wait around until the process dies.
2587          */
2588         if (process->p_flags & LIVING)
2589                 (void) sigsuspend(&zeromask);
2590 
2591         /* Reset signal mask to unblock SIGCLD */
2592         if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
2593                 perror("SIG_SETMASK error");
2594 
2595         if (process->p_flags & LIVING)
2596                 return (FAILURE);
2597 
2598         /*
2599          * Make sure to only return 16 bits so that answer will always
2600          * be positive whenever the process of interest really died.
2601          */
2602         answer = (process->p_exit & 0xffff);
2603 
2604         /*
2605          * Free the slot in the proc_table.
2606          */
2607         process->p_flags = 0;
2608         return (answer);
2609 }
2610 
2611 /*
2612  * notify_pam_dead(): calls into the PAM framework to close the given session.
2613  */
2614 static void
2615 notify_pam_dead(struct utmpx *up)
2616 {
2617         pam_handle_t *pamh;
2618         char user[sizeof (up->ut_user) + 1];
2619         char ttyn[sizeof (up->ut_line) + 1];
2620         char host[sizeof (up->ut_host) + 1];
2621 
2622         /*
2623          * PAM does not take care of updating utmpx/wtmpx.
2624          */
2625         (void) snprintf(user, sizeof (user), "%s", up->ut_user);
2626         (void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
2627         (void) snprintf(host, sizeof (host), "%s", up->ut_host);
2628 
2629         if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
2630                 (void) pam_set_item(pamh, PAM_TTY, ttyn);
2631                 (void) pam_set_item(pamh, PAM_RHOST, host);
2632                 (void) pam_close_session(pamh, 0);
2633                 (void) pam_end(pamh, PAM_SUCCESS);
2634         }
2635 }
2636 
2637 /*
2638  * Check you can access utmpx (As / may be read-only and
2639  * /var may not be mounted yet).
2640  */
2641 static int
2642 access_utmpx(void)
2643 {
2644         do {
2645                 utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
2646         } while (!utmpx_ok && errno == EINTR);
2647 
2648         return (utmpx_ok);
2649 }
2650 
2651 /*
2652  * account() updates entries in utmpx and appends new entries to the end of
2653  * wtmpx (assuming they exist).  The program argument indicates the name of
2654  * program if INIT_PROCESS, otherwise should be NULL.
2655  *
2656  * account() only blocks for INIT_PROCESS requests.
2657  *
2658  * Returns non-zero if write failed.
2659  */
2660 static int
2661 account(short state, struct PROC_TABLE *process, char *program)
2662 {
2663         struct utmpx utmpbuf, *u, *oldu;
2664         int tmplen;
2665         char fail_buf[UT_LINE_SZ];
2666         sigset_t block, unblock;
2667 
2668         if (!utmpx_ok && !access_utmpx()) {
2669                 return (-1);
2670         }
2671 
2672         /*
2673          * Set up the prototype for the utmp structure we want to write.
2674          */
2675         u = &utmpbuf;
2676         (void) memset(u, 0, sizeof (struct utmpx));
2677 
2678         /*
2679          * Fill in the various fields of the utmp structure.
2680          */
2681         u->ut_id[0] = process->p_id[0];
2682         u->ut_id[1] = process->p_id[1];
2683         u->ut_id[2] = process->p_id[2];
2684         u->ut_id[3] = process->p_id[3];
2685         u->ut_pid = process->p_pid;
2686 
2687         /*
2688          * Fill the "ut_exit" structure.
2689          */
2690         u->ut_exit.e_termination = WTERMSIG(process->p_exit);
2691         u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
2692         u->ut_type = state;
2693 
2694         (void) time(&u->ut_tv.tv_sec);
2695 
2696         /*
2697          * Block signals for utmp update.
2698          */
2699         (void) sigfillset(&block);
2700         (void) sigprocmask(SIG_BLOCK, &block, &unblock);
2701 
2702         /*
2703          * See if there already is such an entry in the "utmpx" file.
2704          */
2705         setutxent();    /* Start at beginning of utmpx file. */
2706 
2707         if ((oldu = getutxid(u)) != NULL) {
2708                 /*
2709                  * Copy in the old "user", "line" and "host" fields
2710                  * to our new structure.
2711                  */
2712                 bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
2713                 bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
2714                 bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
2715                 u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
2716                     min(tmplen + 1, sizeof (u->ut_host)) : 0;
2717 
2718                 if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
2719                         notify_pam_dead(oldu);
2720                 }
2721         }
2722 
2723         /*
2724          * Perform special accounting. Insert the special string into the
2725          * ut_line array. For INIT_PROCESSes put in the name of the
2726          * program in the "ut_user" field.
2727          */
2728         switch (state) {
2729         case INIT_PROCESS:
2730                 (void) strncpy(u->ut_user, program, sizeof (u->ut_user));
2731                 (void) strcpy(fail_buf, "INIT_PROCESS");
2732                 break;
2733 
2734         default:
2735                 (void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
2736                 break;
2737         }
2738 
2739         /*
2740          * Write out the updated entry to utmpx file.
2741          */
2742         if (pututxline(u) == NULL) {
2743                 console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
2744                     fail_buf, strerror(errno));
2745                 endutxent();
2746                 (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2747                 return (-1);
2748         }
2749 
2750         /*
2751          * If we're able to write to utmpx, then attempt to add to the
2752          * end of the wtmpx file.
2753          */
2754         updwtmpx(WTMPX, u);
2755 
2756         endutxent();
2757 
2758         (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2759 
2760         return (0);
2761 }
2762 
2763 static void
2764 clearent(pid_t pid, short status)
2765 {
2766         struct utmpx *up;
2767         sigset_t block, unblock;
2768 
2769         /*
2770          * Block signals for utmp update.
2771          */
2772         (void) sigfillset(&block);
2773         (void) sigprocmask(SIG_BLOCK, &block, &unblock);
2774 
2775         /*
2776          * No error checking for now.
2777          */
2778 
2779         setutxent();
2780         while (up = getutxent()) {
2781                 if (up->ut_pid == pid) {
2782                         if (up->ut_type == DEAD_PROCESS) {
2783                                 /*
2784                                  * Cleaned up elsewhere.
2785                                  */
2786                                 continue;
2787                         }
2788 
2789                         notify_pam_dead(up);
2790 
2791                         up->ut_type = DEAD_PROCESS;
2792                         up->ut_exit.e_termination = WTERMSIG(status);
2793                         up->ut_exit.e_exit = WEXITSTATUS(status);
2794                         (void) time(&up->ut_tv.tv_sec);
2795 
2796                         (void) pututxline(up);
2797                         /*
2798                          * Now attempt to add to the end of the
2799                          * wtmp and wtmpx files.  Do not create
2800                          * if they don't already exist.
2801                          */
2802                         updwtmpx(WTMPX, up);
2803 
2804                         break;
2805                 }
2806         }
2807 
2808         endutxent();
2809         (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2810 }
2811 
2812 /*
2813  * prog_name() searches for the word or unix path name and
2814  * returns a pointer to the last element of the pathname.
2815  */
2816 static char *
2817 prog_name(char *string)
2818 {
2819         char    *ptr, *ptr2;
2820         static char word[UT_USER_SZ + 1];
2821 
2822         /*
2823          * Search for the first word skipping leading spaces and tabs.
2824          */
2825         while (*string == ' ' || *string == '\t')
2826                 string++;
2827 
2828         /*
2829          * If the first non-space non-tab character is not one allowed in
2830          * a word, return a pointer to a null string, otherwise parse the
2831          * pathname.
2832          */
2833         if (*string != '.' && *string != '/' && *string != '_' &&
2834             (*string < 'a' || *string > 'z') &&
2835             (*string < 'A' || * string > 'Z') &&
2836             (*string < '0' || *string > '9'))
2837                 return ("");
2838 
2839         /*
2840          * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
2841          * '\0'.  Each time a '/' is found, move "ptr" to one past the
2842          * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
2843          * point to the last element of the pathname.
2844          */
2845         for (ptr = string; *string != ' ' && *string != '\t' &&
2846             *string != '\n' && *string != '\0'; string++) {
2847                 if (*string == '/')
2848                         ptr = string+1;
2849         }
2850 
2851         /*
2852          * Copy out up to the size of the "ut_user" array into "word",
2853          * null terminate it and return a pointer to it.
2854          */
2855         for (ptr2 = &word[0]; ptr2 < &word[UT_USER_SZ] &&
2856             ptr < string; /* CSTYLED */)
2857                 *ptr2++ = *ptr++;
2858 
2859         *ptr2 = '\0';
2860         return (&word[0]);
2861 }
2862 
2863 
2864 /*
2865  * realcon() returns a nonzero value if there is a character device
2866  * associated with SYSCON that has the same device number as CONSOLE.
2867  */
2868 static int
2869 realcon()
2870 {
2871         struct stat sconbuf, conbuf;
2872 
2873         if (stat(SYSCON, &sconbuf) != -1 &&
2874             stat(CONSOLE, &conbuf) != -1 &&
2875             S_ISCHR(sconbuf.st_mode) &&
2876             S_ISCHR(conbuf.st_mode) &&
2877             sconbuf.st_rdev == conbuf.st_rdev) {
2878                 return (1);
2879         } else {
2880                 return (0);
2881         }
2882 }
2883 
2884 
2885 /*
2886  * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
2887  * Returns true if the IOCTLSYSCON file needs to be written (with
2888  * write_ioctl_syscon() below)
2889  */
2890 static int
2891 get_ioctl_syscon()
2892 {
2893         FILE    *fp;
2894         unsigned int    iflags, oflags, cflags, lflags, ldisc, cc[18];
2895         int             i, valid_format = 0;
2896 
2897         /*
2898          * Read in the previous modes for SYSCON from IOCTLSYSCON.
2899          */
2900         if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
2901                 stored_syscon_termios = dflt_termios;
2902                 console(B_TRUE,
2903                     "warning:%s does not exist, default settings assumed\n",
2904                     IOCTLSYSCON);
2905         } else {
2906 
2907                 i = fscanf(fp,
2908             "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2909                     &iflags, &oflags, &cflags, &lflags,
2910                     &cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
2911                     &cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
2912                     &cc[14], &cc[15], &cc[16], &cc[17]);
2913 
2914                 if (i == 22) {
2915                         stored_syscon_termios.c_iflag = iflags;
2916                         stored_syscon_termios.c_oflag = oflags;
2917                         stored_syscon_termios.c_cflag = cflags;
2918                         stored_syscon_termios.c_lflag = lflags;
2919                         for (i = 0; i < 18; i++)
2920                                 stored_syscon_termios.c_cc[i] = (char)cc[i];
2921                         valid_format = 1;
2922                 } else if (i == 13) {
2923                 rewind(fp);
2924                 i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2925                     &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
2926                     &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
2927 
2928                 /*
2929                  * If the file is formatted properly, use the values to
2930                  * initialize the console terminal condition.
2931                  */
2932                 stored_syscon_termios.c_iflag = (ushort_t)iflags;
2933                 stored_syscon_termios.c_oflag = (ushort_t)oflags;
2934                 stored_syscon_termios.c_cflag = (ushort_t)cflags;
2935                 stored_syscon_termios.c_lflag = (ushort_t)lflags;
2936                 for (i = 0; i < 8; i++)
2937                         stored_syscon_termios.c_cc[i] = (char)cc[i];
2938                 valid_format = 1;
2939                 }
2940                 (void) fclose(fp);
2941 
2942                 /* If the file is badly formatted, use the default settings. */
2943                 if (!valid_format)
2944                         stored_syscon_termios = dflt_termios;
2945         }
2946 
2947         /* If the file had a bad format, rewrite it later. */
2948         return (!valid_format);
2949 }
2950 
2951 
2952 static void
2953 write_ioctl_syscon()
2954 {
2955         FILE *fp;
2956         int i;
2957 
2958         (void) unlink(SYSCON);
2959         (void) link(SYSTTY, SYSCON);
2960         (void) umask(022);
2961         fp = fopen(IOCTLSYSCON, "w");
2962 
2963         (void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
2964             stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
2965             stored_syscon_termios.c_lflag);
2966         for (i = 0; i < 8; ++i)
2967                 (void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
2968         (void) putc('\n', fp);
2969 
2970         (void) fflush(fp);
2971         (void) fsync(fileno(fp));
2972         (void) fclose(fp);
2973         (void) umask(cmask);
2974 }
2975 
2976 
2977 /*
2978  * void console(boolean_t, char *, ...)
2979  *   Outputs the requested message to the system console.  Note that the number
2980  *   of arguments passed to console() should be determined by the print format.
2981  *
2982  *   The "prefix" parameter indicates whether or not "INIT: " should precede the
2983  *   message.
2984  *
2985  *   To make sure we write to the console in a sane fashion, we use the modes
2986  *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
2987  *   Afterwards we restore whatever modes were already there.
2988  */
2989 /* PRINTFLIKE2 */
2990 static void
2991 console(boolean_t prefix, char *format, ...)
2992 {
2993         char    outbuf[BUFSIZ];
2994         va_list args;
2995         int fd, getret;
2996         struct termios old_syscon_termios;
2997         FILE *f;
2998 
2999         /*
3000          * We open SYSCON anew each time in case it has changed (see
3001          * userinit()).
3002          */
3003         if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
3004             (f = fdopen(fd, "r+")) == NULL) {
3005                 if (prefix)
3006                         syslog(LOG_WARNING, "INIT: ");
3007                 va_start(args, format);
3008                 vsyslog(LOG_WARNING, format, args);
3009                 va_end(args);
3010                 if (fd >= 0)
3011                         (void) close(fd);
3012                 return;
3013         }
3014         setbuf(f, &outbuf[0]);
3015 
3016         getret = tcgetattr(fd, &old_syscon_termios);
3017         old_syscon_termios.c_cflag &= ~HUPCL;
3018         if (realcon())
3019                 /* Don't overwrite cflag of real console. */
3020                 stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
3021 
3022         stored_syscon_termios.c_cflag &= ~HUPCL;
3023 
3024         (void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
3025 
3026         if (prefix)
3027                 (void) fprintf(f, "\nINIT: ");
3028         va_start(args, format);
3029         (void) vfprintf(f, format, args);
3030         va_end(args);
3031 
3032         if (getret == 0)
3033                 (void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
3034 
3035         (void) fclose(f);
3036 }
3037 
3038 /*
3039  * timer() is a substitute for sleep() which uses alarm() and pause().
3040  */
3041 static void
3042 timer(int waitime)
3043 {
3044         setimer(waitime);
3045         while (time_up == FALSE)
3046                 (void) pause();
3047 }
3048 
3049 static void
3050 setimer(int timelimit)
3051 {
3052         alarmclk();
3053         (void) alarm(timelimit);
3054         time_up = (timelimit ? FALSE : TRUE);
3055 }
3056 
3057 /*
3058  * Fails with
3059  *   ENOMEM - out of memory
3060  *   ECONNABORTED - repository connection broken
3061  *   EPERM - permission denied
3062  *   EACCES - backend access denied
3063  *   EROFS - backend readonly
3064  */
3065 static int
3066 get_or_add_startd(scf_instance_t *inst)
3067 {
3068         scf_handle_t *h;
3069         scf_scope_t *scope = NULL;
3070         scf_service_t *svc = NULL;
3071         int ret = 0;
3072 
3073         h = scf_instance_handle(inst);
3074 
3075         if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
3076             NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
3077                 return (0);
3078 
3079         switch (scf_error()) {
3080         case SCF_ERROR_CONNECTION_BROKEN:
3081                 return (ECONNABORTED);
3082 
3083         case SCF_ERROR_NOT_FOUND:
3084                 break;
3085 
3086         case SCF_ERROR_HANDLE_MISMATCH:
3087         case SCF_ERROR_INVALID_ARGUMENT:
3088         case SCF_ERROR_CONSTRAINT_VIOLATED:
3089         default:
3090                 bad_error("scf_handle_decode_fmri", scf_error());
3091         }
3092 
3093         /* Make sure we're right, since we're adding piece-by-piece. */
3094         assert(strcmp(SCF_SERVICE_STARTD,
3095             "svc:/system/svc/restarter:default") == 0);
3096 
3097         if ((scope = scf_scope_create(h)) == NULL ||
3098             (svc = scf_service_create(h)) == NULL) {
3099                 ret = ENOMEM;
3100                 goto out;
3101         }
3102 
3103 get_scope:
3104         if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
3105                 switch (scf_error()) {
3106                 case SCF_ERROR_CONNECTION_BROKEN:
3107                         ret = ECONNABORTED;
3108                         goto out;
3109 
3110                 case SCF_ERROR_NOT_FOUND:
3111                         (void) fputs(gettext(
3112                             "smf(5) repository missing local scope.\n"),
3113                             stderr);
3114                         exit(1);
3115                         /* NOTREACHED */
3116 
3117                 case SCF_ERROR_HANDLE_MISMATCH:
3118                 case SCF_ERROR_INVALID_ARGUMENT:
3119                 default:
3120                         bad_error("scf_handle_get_scope", scf_error());
3121                 }
3122         }
3123 
3124 get_svc:
3125         if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
3126                 switch (scf_error()) {
3127                 case SCF_ERROR_CONNECTION_BROKEN:
3128                         ret = ECONNABORTED;
3129                         goto out;
3130 
3131                 case SCF_ERROR_DELETED:
3132                         goto get_scope;
3133 
3134                 case SCF_ERROR_NOT_FOUND:
3135                         break;
3136 
3137                 case SCF_ERROR_HANDLE_MISMATCH:
3138                 case SCF_ERROR_INVALID_ARGUMENT:
3139                 case SCF_ERROR_NOT_SET:
3140                 default:
3141                         bad_error("scf_scope_get_service", scf_error());
3142                 }
3143 
3144 add_svc:
3145                 if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
3146                     0) {
3147                         switch (scf_error()) {
3148                         case SCF_ERROR_CONNECTION_BROKEN:
3149                                 ret = ECONNABORTED;
3150                                 goto out;
3151 
3152                         case SCF_ERROR_EXISTS:
3153                                 goto get_svc;
3154 
3155                         case SCF_ERROR_PERMISSION_DENIED:
3156                                 ret = EPERM;
3157                                 goto out;
3158 
3159                         case SCF_ERROR_BACKEND_ACCESS:
3160                                 ret = EACCES;
3161                                 goto out;
3162 
3163                         case SCF_ERROR_BACKEND_READONLY:
3164                                 ret = EROFS;
3165                                 goto out;
3166 
3167                         case SCF_ERROR_HANDLE_MISMATCH:
3168                         case SCF_ERROR_INVALID_ARGUMENT:
3169                         case SCF_ERROR_NOT_SET:
3170                         default:
3171                                 bad_error("scf_scope_add_service", scf_error());
3172                         }
3173                 }
3174         }
3175 
3176 get_inst:
3177         if (scf_service_get_instance(svc, "default", inst) != 0) {
3178                 switch (scf_error()) {
3179                 case SCF_ERROR_CONNECTION_BROKEN:
3180                         ret = ECONNABORTED;
3181                         goto out;
3182 
3183                 case SCF_ERROR_DELETED:
3184                         goto add_svc;
3185 
3186                 case SCF_ERROR_NOT_FOUND:
3187                         break;
3188 
3189                 case SCF_ERROR_HANDLE_MISMATCH:
3190                 case SCF_ERROR_INVALID_ARGUMENT:
3191                 case SCF_ERROR_NOT_SET:
3192                 default:
3193                         bad_error("scf_service_get_instance", scf_error());
3194                 }
3195 
3196                 if (scf_service_add_instance(svc, "default", inst) !=
3197                     0) {
3198                         switch (scf_error()) {
3199                         case SCF_ERROR_CONNECTION_BROKEN:
3200                                 ret = ECONNABORTED;
3201                                 goto out;
3202 
3203                         case SCF_ERROR_DELETED:
3204                                 goto add_svc;
3205 
3206                         case SCF_ERROR_EXISTS:
3207                                 goto get_inst;
3208 
3209                         case SCF_ERROR_PERMISSION_DENIED:
3210                                 ret = EPERM;
3211                                 goto out;
3212 
3213                         case SCF_ERROR_BACKEND_ACCESS:
3214                                 ret = EACCES;
3215                                 goto out;
3216 
3217                         case SCF_ERROR_BACKEND_READONLY:
3218                                 ret = EROFS;
3219                                 goto out;
3220 
3221                         case SCF_ERROR_HANDLE_MISMATCH:
3222                         case SCF_ERROR_INVALID_ARGUMENT:
3223                         case SCF_ERROR_NOT_SET:
3224                         default:
3225                                 bad_error("scf_service_add_instance",
3226                                     scf_error());
3227                         }
3228                 }
3229         }
3230 
3231         ret = 0;
3232 
3233 out:
3234         scf_service_destroy(svc);
3235         scf_scope_destroy(scope);
3236         return (ret);
3237 }
3238 
3239 /*
3240  * Fails with
3241  *   ECONNABORTED - repository connection broken
3242  *   ECANCELED - the transaction's property group was deleted
3243  */
3244 static int
3245 transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
3246     const char *pname, scf_type_t type)
3247 {
3248 change_type:
3249         if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
3250                 return (0);
3251 
3252         switch (scf_error()) {
3253         case SCF_ERROR_CONNECTION_BROKEN:
3254                 return (ECONNABORTED);
3255 
3256         case SCF_ERROR_DELETED:
3257                 return (ECANCELED);
3258 
3259         case SCF_ERROR_NOT_FOUND:
3260                 goto new;
3261 
3262         case SCF_ERROR_HANDLE_MISMATCH:
3263         case SCF_ERROR_INVALID_ARGUMENT:
3264         case SCF_ERROR_NOT_BOUND:
3265         case SCF_ERROR_NOT_SET:
3266         default:
3267                 bad_error("scf_transaction_property_change_type", scf_error());
3268         }
3269 
3270 new:
3271         if (scf_transaction_property_new(tx, ent, pname, type) == 0)
3272                 return (0);
3273 
3274         switch (scf_error()) {
3275         case SCF_ERROR_CONNECTION_BROKEN:
3276                 return (ECONNABORTED);
3277 
3278         case SCF_ERROR_DELETED:
3279                 return (ECANCELED);
3280 
3281         case SCF_ERROR_EXISTS:
3282                 goto change_type;
3283 
3284         case SCF_ERROR_HANDLE_MISMATCH:
3285         case SCF_ERROR_INVALID_ARGUMENT:
3286         case SCF_ERROR_NOT_BOUND:
3287         case SCF_ERROR_NOT_SET:
3288         default:
3289                 bad_error("scf_transaction_property_new", scf_error());
3290                 /* NOTREACHED */
3291         }
3292 }
3293 
3294 static void
3295 scferr(void)
3296 {
3297         switch (scf_error()) {
3298         case SCF_ERROR_NO_MEMORY:
3299                 console(B_TRUE, gettext("Out of memory.\n"));
3300                 break;
3301 
3302         case SCF_ERROR_CONNECTION_BROKEN:
3303                 console(B_TRUE, gettext(
3304                     "Connection to smf(5) repository server broken.\n"));
3305                 break;
3306 
3307         case SCF_ERROR_NO_RESOURCES:
3308                 console(B_TRUE, gettext(
3309                     "smf(5) repository server is out of memory.\n"));
3310                 break;
3311 
3312         case SCF_ERROR_PERMISSION_DENIED:
3313                 console(B_TRUE, gettext("Insufficient privileges.\n"));
3314                 break;
3315 
3316         default:
3317                 console(B_TRUE, gettext("libscf error: %s\n"),
3318                     scf_strerror(scf_error()));
3319         }
3320 }
3321 
3322 static void
3323 lscf_set_runlevel(char rl)
3324 {
3325         scf_handle_t *h;
3326         scf_instance_t *inst = NULL;
3327         scf_propertygroup_t *pg = NULL;
3328         scf_transaction_t *tx = NULL;
3329         scf_transaction_entry_t *ent = NULL;
3330         scf_value_t *val = NULL;
3331         char buf[2];
3332         int r;
3333 
3334         h = scf_handle_create(SCF_VERSION);
3335         if (h == NULL) {
3336                 scferr();
3337                 return;
3338         }
3339 
3340         if (scf_handle_bind(h) != 0) {
3341                 switch (scf_error()) {
3342                 case SCF_ERROR_NO_SERVER:
3343                         console(B_TRUE,
3344                             gettext("smf(5) repository server not running.\n"));
3345                         goto bail;
3346 
3347                 default:
3348                         scferr();
3349                         goto bail;
3350                 }
3351         }
3352 
3353         if ((inst = scf_instance_create(h)) == NULL ||
3354             (pg = scf_pg_create(h)) == NULL ||
3355             (val = scf_value_create(h)) == NULL ||
3356             (tx = scf_transaction_create(h)) == NULL ||
3357             (ent = scf_entry_create(h)) == NULL) {
3358                 scferr();
3359                 goto bail;
3360         }
3361 
3362 get_inst:
3363         r = get_or_add_startd(inst);
3364         switch (r) {
3365         case 0:
3366                 break;
3367 
3368         case ENOMEM:
3369         case ECONNABORTED:
3370         case EPERM:
3371         case EACCES:
3372         case EROFS:
3373                 scferr();
3374                 goto bail;
3375         default:
3376                 bad_error("get_or_add_startd", r);
3377         }
3378 
3379 get_pg:
3380         if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
3381                 switch (scf_error()) {
3382                 case SCF_ERROR_CONNECTION_BROKEN:
3383                         scferr();
3384                         goto bail;
3385 
3386                 case SCF_ERROR_DELETED:
3387                         goto get_inst;
3388 
3389                 case SCF_ERROR_NOT_FOUND:
3390                         break;
3391 
3392                 case SCF_ERROR_HANDLE_MISMATCH:
3393                 case SCF_ERROR_INVALID_ARGUMENT:
3394                 case SCF_ERROR_NOT_SET:
3395                 default:
3396                         bad_error("scf_instance_get_pg", scf_error());
3397                 }
3398 
3399 add_pg:
3400                 if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
3401                     SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
3402                     0) {
3403                         switch (scf_error()) {
3404                         case SCF_ERROR_CONNECTION_BROKEN:
3405                         case SCF_ERROR_PERMISSION_DENIED:
3406                         case SCF_ERROR_BACKEND_ACCESS:
3407                                 scferr();
3408                                 goto bail;
3409 
3410                         case SCF_ERROR_DELETED:
3411                                 goto get_inst;
3412 
3413                         case SCF_ERROR_EXISTS:
3414                                 goto get_pg;
3415 
3416                         case SCF_ERROR_HANDLE_MISMATCH:
3417                         case SCF_ERROR_INVALID_ARGUMENT:
3418                         case SCF_ERROR_NOT_SET:
3419                         default:
3420                                 bad_error("scf_instance_add_pg", scf_error());
3421                         }
3422                 }
3423         }
3424 
3425         buf[0] = rl;
3426         buf[1] = '\0';
3427         r = scf_value_set_astring(val, buf);
3428         assert(r == 0);
3429 
3430         for (;;) {
3431                 if (scf_transaction_start(tx, pg) != 0) {
3432                         switch (scf_error()) {
3433                         case SCF_ERROR_CONNECTION_BROKEN:
3434                         case SCF_ERROR_PERMISSION_DENIED:
3435                         case SCF_ERROR_BACKEND_ACCESS:
3436                                 scferr();
3437                                 goto bail;
3438 
3439                         case SCF_ERROR_DELETED:
3440                                 goto add_pg;
3441 
3442                         case SCF_ERROR_HANDLE_MISMATCH:
3443                         case SCF_ERROR_NOT_BOUND:
3444                         case SCF_ERROR_IN_USE:
3445                         case SCF_ERROR_NOT_SET:
3446                         default:
3447                                 bad_error("scf_transaction_start", scf_error());
3448                         }
3449                 }
3450 
3451                 r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
3452                 switch (r) {
3453                 case 0:
3454                         break;
3455 
3456                 case ECONNABORTED:
3457                         scferr();
3458                         goto bail;
3459 
3460                 case ECANCELED:
3461                         scf_transaction_reset(tx);
3462                         goto add_pg;
3463 
3464                 default:
3465                         bad_error("transaction_add_set", r);
3466                 }
3467 
3468                 r = scf_entry_add_value(ent, val);
3469                 assert(r == 0);
3470 
3471                 r = scf_transaction_commit(tx);
3472                 if (r == 1)
3473                         break;
3474 
3475                 if (r != 0) {
3476                         switch (scf_error()) {
3477                         case SCF_ERROR_CONNECTION_BROKEN:
3478                         case SCF_ERROR_PERMISSION_DENIED:
3479                         case SCF_ERROR_BACKEND_ACCESS:
3480                         case SCF_ERROR_BACKEND_READONLY:
3481                                 scferr();
3482                                 goto bail;
3483 
3484                         case SCF_ERROR_DELETED:
3485                                 scf_transaction_reset(tx);
3486                                 goto add_pg;
3487 
3488                         case SCF_ERROR_INVALID_ARGUMENT:
3489                         case SCF_ERROR_NOT_BOUND:
3490                         case SCF_ERROR_NOT_SET:
3491                         default:
3492                                 bad_error("scf_transaction_commit",
3493                                     scf_error());
3494                         }
3495                 }
3496 
3497                 scf_transaction_reset(tx);
3498                 (void) scf_pg_update(pg);
3499         }
3500 
3501 bail:
3502         scf_transaction_destroy(tx);
3503         scf_entry_destroy(ent);
3504         scf_value_destroy(val);
3505         scf_pg_destroy(pg);
3506         scf_instance_destroy(inst);
3507 
3508         (void) scf_handle_unbind(h);
3509         scf_handle_destroy(h);
3510 }
3511 
3512 /*
3513  * Function to handle requests from users to main init running as process 1.
3514  */
3515 static void
3516 userinit(int argc, char **argv)
3517 {
3518         FILE    *fp;
3519         char    *ln;
3520         int     init_signal;
3521         struct stat     sconbuf, conbuf;
3522         const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
3523 
3524         /*
3525          * We are a user invoked init.  Is there an argument and is it
3526          * a single character?  If not, print usage message and quit.
3527          */
3528         if (argc != 2 || argv[1][1] != '\0') {
3529                 (void) fprintf(stderr, usage_msg);
3530                 exit(0);
3531         }
3532 
3533         if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
3534                 (void) fprintf(stderr, usage_msg);
3535                 (void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
3536                     argv[1]);
3537                 exit(1);
3538         }
3539 
3540         if (init_signal == SINGLE_USER) {
3541                 /*
3542                  * Make sure this process is talking to a legal tty line
3543                  * and that /dev/syscon is linked to this line.
3544                  */
3545                 ln = ttyname(0);        /* Get the name of tty */
3546                 if (ln == NULL) {
3547                         (void) fprintf(stderr,
3548                             "Standard input not a tty line\n");
3549                         (void) audit_put_record(ADT_FAILURE,
3550                             ADT_FAIL_VALUE_BAD_TTY, argv[1]);
3551                         exit(1);
3552                 }
3553 
3554                 if ((stat(ln, &sconbuf) != -1) &&
3555                     (stat(SYSCON, &conbuf) == -1 ||
3556                     sconbuf.st_rdev != conbuf.st_rdev)) {
3557                         /*
3558                          * /dev/syscon needs to change.
3559                          * Unlink /dev/syscon and relink it to the current line.
3560                          */
3561                         if (lstat(SYSCON, &conbuf) != -1 &&
3562                             unlink(SYSCON) == FAILURE) {
3563                                 perror("Can't unlink /dev/syscon");
3564                                 (void) fprintf(stderr,
3565                                     "Run command on the system console.\n");
3566                                 (void) audit_put_record(ADT_FAILURE,
3567                                     ADT_FAIL_VALUE_PROGRAM, argv[1]);
3568                                 exit(1);
3569                         }
3570                         if (symlink(ln, SYSCON) == FAILURE) {
3571                                 (void) fprintf(stderr,
3572                                     "Can't symlink /dev/syscon to %s: %s", ln,
3573                                     strerror(errno));
3574 
3575                                 /* Try to leave a syscon */
3576                                 (void) link(SYSTTY, SYSCON);
3577                                 (void) audit_put_record(ADT_FAILURE,
3578                                     ADT_FAIL_VALUE_PROGRAM, argv[1]);
3579                                 exit(1);
3580                         }
3581 
3582                         /*
3583                          * Try to leave a message on system console saying where
3584                          * /dev/syscon is currently connected.
3585                          */
3586                         if ((fp = fopen(SYSTTY, "r+")) != NULL) {
3587                                 (void) fprintf(fp,
3588                                     "\n****     SYSCON CHANGED TO %s    ****\n",
3589                                     ln);
3590                                 (void) fclose(fp);
3591                         }
3592                 }
3593         }
3594 
3595         update_boot_archive(init_signal);
3596 
3597         (void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
3598 
3599         /*
3600          * Signal init; init will take care of telling svc.startd.
3601          */
3602         if (kill(init_pid, init_signal) == FAILURE) {
3603                 (void) fprintf(stderr, "Must be super-user\n");
3604                 (void) audit_put_record(ADT_FAILURE,
3605                     ADT_FAIL_VALUE_AUTH, argv[1]);
3606                 exit(1);
3607         }
3608 
3609         exit(0);
3610 }
3611 
3612 
3613 #define DELTA   25      /* Number of pidlist elements to allocate at a time */
3614 
3615 /* ARGSUSED */
3616 void
3617 sigpoll(int n)
3618 {
3619         struct pidrec prec;
3620         struct pidrec *p = &prec;
3621         struct pidlist *plp;
3622         struct pidlist *tp, *savetp;
3623         int i;
3624 
3625         if (Pfd < 0) {
3626                 return;
3627         }
3628 
3629         for (;;) {
3630                 /*
3631                  * Important Note: Either read will really fail (in which case
3632                  * return is all we can do) or will get EAGAIN (Pfd was opened
3633                  * O_NDELAY), in which case we also want to return.
3634                  * Always return from here!
3635                  */
3636                 if (read(Pfd, p, sizeof (struct pidrec)) !=
3637                                                 sizeof (struct pidrec)) {
3638                         return;
3639                 }
3640                 switch (p->pd_type) {
3641 
3642                 case ADDPID:
3643                         /*
3644                          * New "godchild", add to list.
3645                          */
3646                         if (Plfree == NULL) {
3647                                 plp = (struct pidlist *)calloc(DELTA,
3648                                     sizeof (struct pidlist));
3649                                 if (plp == NULL) {
3650                                         /* Can't save pid */
3651                                         break;
3652                                 }
3653                                 /*
3654                                  * Point at 2nd record allocated, we'll use plp.
3655                                  */
3656                                 tp = plp + 1;
3657                                 /*
3658                                  * Link them into a chain.
3659                                  */
3660                                 Plfree = tp;
3661                                 for (i = 0; i < DELTA - 2; i++) {
3662                                         tp->pl_next = tp + 1;
3663                                         tp++;
3664                                 }
3665                         } else {
3666                                 plp = Plfree;
3667                                 Plfree = plp->pl_next;
3668                         }
3669                         plp->pl_pid = p->pd_pid;
3670                         plp->pl_dflag = 0;
3671                         plp->pl_next = NULL;
3672                         /*
3673                          * Note - pid list is kept in increasing order of pids.
3674                          */
3675                         if (Plhead == NULL) {
3676                                 Plhead = plp;
3677                                 /* Back up to read next record */
3678                                 break;
3679                         } else {
3680                                 savetp = tp = Plhead;
3681                                 while (tp) {
3682                                         if (plp->pl_pid > tp->pl_pid) {
3683                                                 savetp = tp;
3684                                                 tp = tp->pl_next;
3685                                                 continue;
3686                                         } else if (plp->pl_pid < tp->pl_pid) {
3687                                                 if (tp == Plhead) {
3688                                                         plp->pl_next = Plhead;
3689                                                         Plhead = plp;
3690                                                 } else {
3691                                                         plp->pl_next =
3692                                                             savetp->pl_next;
3693                                                         savetp->pl_next = plp;
3694                                                 }
3695                                                 break;
3696                                         } else {
3697                                                 /* Already in list! */
3698                                                 plp->pl_next = Plfree;
3699                                                 Plfree = plp;
3700                                                 break;
3701                                         }
3702                                 }
3703                                 if (tp == NULL) {
3704                                         /* Add to end of list */
3705                                         savetp->pl_next = plp;
3706                                 }
3707                         }
3708                         /* Back up to read next record. */
3709                         break;
3710 
3711                 case REMPID:
3712                         /*
3713                          * This one was handled by someone else,
3714                          * purge it from the list.
3715                          */
3716                         if (Plhead == NULL) {
3717                                 /* Back up to read next record. */
3718                                 break;
3719                         }
3720                         savetp = tp = Plhead;
3721                         while (tp) {
3722                                 if (p->pd_pid > tp->pl_pid) {
3723                                         /* Keep on looking. */
3724                                         savetp = tp;
3725                                         tp = tp->pl_next;
3726                                         continue;
3727                                 } else if (p->pd_pid < tp->pl_pid) {
3728                                         /* Not in list. */
3729                                         break;
3730                                 } else {
3731                                         /* Found it. */
3732                                         if (tp == Plhead)
3733                                                 Plhead = tp->pl_next;
3734                                         else
3735                                                 savetp->pl_next = tp->pl_next;
3736                                         tp->pl_next = Plfree;
3737                                         Plfree = tp;
3738                                         break;
3739                                 }
3740                         }
3741                         /* Back up to read next record. */
3742                         break;
3743                 default:
3744                         console(B_TRUE, "Bad message on initpipe\n");
3745                         break;
3746                 }
3747         }
3748 }
3749 
3750 
3751 static void
3752 cleanaux()
3753 {
3754         struct pidlist *savep, *p;
3755         pid_t   pid;
3756         short   status;
3757 
3758         (void) sighold(SIGCLD);
3759         Gchild = 0;     /* Note - Safe to do this here since no SIGCLDs */
3760         (void) sighold(SIGPOLL);
3761         savep = p = Plhead;
3762         while (p) {
3763                 if (p->pl_dflag) {
3764                         /*
3765                          * Found an entry to delete,
3766                          * remove it from list first.
3767                          */
3768                         pid = p->pl_pid;
3769                         status = p->pl_exit;
3770                         if (p == Plhead) {
3771                                 Plhead = p->pl_next;
3772                                 p->pl_next = Plfree;
3773                                 Plfree = p;
3774                                 savep = p = Plhead;
3775                         } else {
3776                                 savep->pl_next = p->pl_next;
3777                                 p->pl_next = Plfree;
3778                                 Plfree = p;
3779                                 p = savep->pl_next;
3780                         }
3781                         clearent(pid, status);
3782                         continue;
3783                 }
3784                 savep = p;
3785                 p = p->pl_next;
3786         }
3787         (void) sigrelse(SIGPOLL);
3788         (void) sigrelse(SIGCLD);
3789 }
3790 
3791 
3792 /*
3793  * /etc/inittab has more entries and we have run out of room in the proc_table
3794  * array. Double the size of proc_table to accomodate the extra entries.
3795  */
3796 static void
3797 increase_proc_table_size()
3798 {
3799         sigset_t block, unblock;
3800         void *ptr;
3801         size_t delta = num_proc * sizeof (struct PROC_TABLE);
3802 
3803 
3804         /*
3805          * Block signals for realloc.
3806          */
3807         (void) sigfillset(&block);
3808         (void) sigprocmask(SIG_BLOCK, &block, &unblock);
3809 
3810 
3811         /*
3812          * On failure we just return because callers of this function check
3813          * for failure.
3814          */
3815         do
3816                 ptr = realloc(g_state, g_state_sz + delta);
3817         while (ptr == NULL && errno == EAGAIN)
3818                 ;
3819 
3820         if (ptr != NULL) {
3821                 /* ensure that the new part is initialized to zero */
3822                 bzero((caddr_t)ptr + g_state_sz, delta);
3823 
3824                 g_state = ptr;
3825                 g_state_sz += delta;
3826                 num_proc <<= 1;
3827         }
3828 
3829 
3830         /* unblock our signals before returning */
3831         (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
3832 }
3833 
3834 
3835 
3836 /*
3837  * Sanity check g_state.
3838  */
3839 static int
3840 st_sane()
3841 {
3842         int i;
3843         struct PROC_TABLE *ptp;
3844 
3845 
3846         /* Note: cur_state is encoded as a signal number */
3847         if (cur_state < 1 || cur_state == 9 || cur_state > 13)
3848                 return (0);
3849 
3850         /* Check num_proc */
3851         if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
3852             sizeof (struct PROC_TABLE))
3853                 return (0);
3854 
3855         /* Check proc_table */
3856         for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
3857                 /* skip unoccupied entries */
3858                 if (!(ptp->p_flags & OCCUPIED))
3859                         continue;
3860 
3861                 /* p_flags has no bits outside of PF_MASK */
3862                 if (ptp->p_flags & ~(PF_MASK))
3863                         return (0);
3864 
3865                 /* 5 <= pid <= MAXPID */
3866                 if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
3867                         return (0);
3868 
3869                 /* p_count >= 0 */
3870                 if (ptp->p_count < 0)
3871                         return (0);
3872 
3873                 /* p_time >= 0 */
3874                 if (ptp->p_time < 0)
3875                         return (0);
3876         }
3877 
3878         return (1);
3879 }
3880 
3881 /*
3882  * Initialize our state.
3883  *
3884  * If the system just booted, then init_state_file, which is located on an
3885  * everpresent tmpfs filesystem, should not exist.
3886  *
3887  * If we were restarted, then init_state_file should exist, in
3888  * which case we'll read it in, sanity check it, and use it.
3889  *
3890  * Note: You can't call console() until proc_table is ready.
3891  */
3892 void
3893 st_init()
3894 {
3895         struct stat stb;
3896         int ret, st_fd, insane = 0;
3897         size_t to_be_read;
3898         char *ptr;
3899 
3900 
3901         booting = 1;
3902 
3903         do {
3904                 /*
3905                  * If we can exclusively create the file, then we're the
3906                  * initial invocation of init(1M).
3907                  */
3908                 st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
3909                     S_IRUSR | S_IWUSR);
3910         } while (st_fd == -1 && errno == EINTR);
3911         if (st_fd != -1)
3912                 goto new_state;
3913 
3914         booting = 0;
3915 
3916         do {
3917                 st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
3918         } while (st_fd == -1 && errno == EINTR);
3919         if (st_fd == -1)
3920                 goto new_state;
3921 
3922         /* Get the size of the file. */
3923         do
3924                 ret = fstat(st_fd, &stb);
3925         while (ret == -1 && errno == EINTR)
3926                 ;
3927         if (ret == -1)
3928                 goto new_state;
3929 
3930         do
3931                 g_state = malloc(stb.st_size);
3932         while (g_state == NULL && errno == EAGAIN)
3933                 ;
3934         if (g_state == NULL)
3935                 goto new_state;
3936 
3937         to_be_read = stb.st_size;
3938         ptr = (char *)g_state;
3939         while (to_be_read > 0) {
3940                 ssize_t read_ret;
3941 
3942                 read_ret = read(st_fd, ptr, to_be_read);
3943                 if (read_ret < 0) {
3944                         if (errno == EINTR)
3945                                 continue;
3946 
3947                         goto new_state;
3948                 }
3949 
3950                 to_be_read -= read_ret;
3951                 ptr += read_ret;
3952         }
3953 
3954         (void) close(st_fd);
3955 
3956         g_state_sz = stb.st_size;
3957 
3958         if (st_sane()) {
3959                 console(B_TRUE, "Restarting.\n");
3960                 return;
3961         }
3962 
3963         insane = 1;
3964 
3965 new_state:
3966         if (st_fd >= 0)
3967                 (void) close(st_fd);
3968         else
3969                 (void) unlink(init_state_file);
3970 
3971         if (g_state != NULL)
3972                 free(g_state);
3973 
3974         /* Something went wrong, so allocate new state. */
3975         g_state_sz = sizeof (struct init_state) +
3976             ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
3977         do
3978                 g_state = calloc(1, g_state_sz);
3979         while (g_state == NULL && errno == EAGAIN)
3980                 ;
3981         if (g_state == NULL) {
3982                 /* Fatal error! */
3983                 exit(errno);
3984         }
3985 
3986         g_state->ist_runlevel = -1;
3987         num_proc = init_num_proc;
3988 
3989         if (!booting) {
3990                 console(B_TRUE, "Restarting.\n");
3991 
3992                 /* Overwrite the bad state file. */
3993                 st_write();
3994 
3995                 if (!insane) {
3996                         console(B_TRUE,
3997                             "Error accessing persistent state file `%s'.  "
3998                             "Ignored.\n", init_state_file);
3999                 } else {
4000                         console(B_TRUE,
4001                             "Persistent state file `%s' is invalid and was "
4002                             "ignored.\n", init_state_file);
4003                 }
4004         }
4005 }
4006 
4007 /*
4008  * Write g_state out to the state file.
4009  */
4010 void
4011 st_write()
4012 {
4013         static int complained = 0;
4014 
4015         int st_fd;
4016         char *cp;
4017         size_t sz;
4018         ssize_t ret;
4019 
4020 
4021         do {
4022                 st_fd = open(init_next_state_file,
4023                     O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
4024         } while (st_fd < 0 && errno == EINTR);
4025         if (st_fd < 0)
4026                 goto err;
4027 
4028         cp = (char *)g_state;
4029         sz = g_state_sz;
4030         while (sz > 0) {
4031                 ret = write(st_fd, cp, sz);
4032                 if (ret < 0) {
4033                         if (errno == EINTR)
4034                                 continue;
4035 
4036                         goto err;
4037                 }
4038 
4039                 sz -= ret;
4040                 cp += ret;
4041         }
4042 
4043         (void) close(st_fd);
4044         st_fd = -1;
4045         if (rename(init_next_state_file, init_state_file)) {
4046                 (void) unlink(init_next_state_file);
4047                 goto err;
4048         }
4049         complained = 0;
4050 
4051         return;
4052 
4053 err:
4054         if (st_fd >= 0)
4055                 (void) close(st_fd);
4056 
4057         if (!booting && !complained) {
4058                 /*
4059                  * Only complain after the filesystem should have come up.
4060                  * And only do it once so we don't loop between console()
4061                  * & efork().
4062                  */
4063                 complained = 1;
4064                 if (st_fd)
4065                         console(B_TRUE, "Couldn't write persistent state "
4066                             "file `%s'.\n", init_state_file);
4067                 else
4068                         console(B_TRUE, "Couldn't move persistent state "
4069                             "file `%s' to `%s'.\n", init_next_state_file,
4070                             init_state_file);
4071         }
4072 }
4073 
4074 /*
4075  * Create a contract with these parameters.
4076  */
4077 static int
4078 contract_make_template(uint_t info, uint_t critical, uint_t fatal,
4079     uint64_t cookie)
4080 {
4081         int fd, err;
4082 
4083         char *ioctl_tset_emsg =
4084             "Couldn't set \"%s\" contract template parameter: %s.\n";
4085 
4086         do
4087                 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
4088         while (fd < 0 && errno == EINTR)
4089                 ;
4090         if (fd < 0) {
4091                 console(B_TRUE, "Couldn't create process template: %s.\n",
4092                     strerror(errno));
4093                 return (-1);
4094         }
4095 
4096         if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
4097                 console(B_TRUE, "Contract set template inherit, regent "
4098                     "failed: %s.\n", strerror(err));
4099 
4100         /*
4101          * These errors result in a misconfigured template, which is better
4102          * than no template at all, so warn but don't abort.
4103          */
4104         if (err = ct_tmpl_set_informative(fd, info))
4105                 console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
4106 
4107         if (err = ct_tmpl_set_critical(fd, critical))
4108                 console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
4109 
4110         if (err = ct_pr_tmpl_set_fatal(fd, fatal))
4111                 console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
4112 
4113         if (err = ct_tmpl_set_cookie(fd, cookie))
4114                 console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
4115 
4116         (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4117 
4118         return (fd);
4119 }
4120 
4121 /*
4122  * Create the templates and open an event file descriptor.  We use dup2(2) to
4123  * get these descriptors away from the stdin/stdout/stderr group.
4124  */
4125 static void
4126 contracts_init()
4127 {
4128         int err, fd;
4129 
4130         /*
4131          * Create & configure a legacy template.  We only want empty events so
4132          * we know when to abandon them.
4133          */
4134         legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
4135             ORDINARY_COOKIE);
4136         if (legacy_tmpl >= 0) {
4137                 err = ct_tmpl_activate(legacy_tmpl);
4138                 if (err != 0) {
4139                         (void) close(legacy_tmpl);
4140                         legacy_tmpl = -1;
4141                         console(B_TRUE,
4142                             "Couldn't activate legacy template (%s); "
4143                             "legacy services will be in init's contract.\n",
4144                             strerror(err));
4145                 }
4146         } else
4147                 console(B_TRUE,
4148                     "Legacy services will be in init's contract.\n");
4149 
4150         if (dup2(legacy_tmpl, 255) == -1) {
4151                 console(B_TRUE, "Could not duplicate legacy template: %s.\n",
4152                     strerror(errno));
4153         } else {
4154                 (void) close(legacy_tmpl);
4155                 legacy_tmpl = 255;
4156         }
4157 
4158         (void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
4159 
4160         startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4161             CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
4162 
4163         if (dup2(startd_tmpl, 254) == -1) {
4164                 console(B_TRUE, "Could not duplicate startd template: %s.\n",
4165                     strerror(errno));
4166         } else {
4167                 (void) close(startd_tmpl);
4168                 startd_tmpl = 254;
4169         }
4170 
4171         (void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
4172 
4173         if (legacy_tmpl < 0 && startd_tmpl < 0) {
4174                 /* The creation errors have already been reported. */
4175                 console(B_TRUE,
4176                     "Ignoring contract events.  Core smf(5) services will not "
4177                     "be restarted.\n");
4178                 return;
4179         }
4180 
4181         /*
4182          * Open an event endpoint.
4183          */
4184         do
4185                 fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
4186         while (fd < 0 && errno == EINTR)
4187                 ;
4188         if (fd < 0) {
4189                 console(B_TRUE,
4190                     "Couldn't open process pbundle: %s.  Core smf(5) services "
4191                     "will not be restarted.\n", strerror(errno));
4192                 return;
4193         }
4194 
4195         if (dup2(fd, 253) == -1) {
4196                 console(B_TRUE, "Could not duplicate process bundle: %s.\n",
4197                     strerror(errno));
4198         } else {
4199                 (void) close(fd);
4200                 fd = 253;
4201         }
4202 
4203         (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4204 
4205         /* Reset in case we've been restarted. */
4206         (void) ct_event_reset(fd);
4207 
4208         poll_fds[0].fd = fd;
4209         poll_fds[0].events = POLLIN;
4210         poll_nfds = 1;
4211 }
4212 
4213 static int
4214 contract_getfile(ctid_t id, const char *name, int oflag)
4215 {
4216         int fd;
4217 
4218         do
4219                 fd = contract_open(id, "process", name, oflag);
4220         while (fd < 0 && errno == EINTR)
4221                 ;
4222 
4223         if (fd < 0)
4224                 console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
4225                     name, id, strerror(errno));
4226 
4227         return (fd);
4228 }
4229 
4230 static int
4231 contract_cookie(ctid_t id, uint64_t *cp)
4232 {
4233         int fd, err;
4234         ct_stathdl_t sh;
4235 
4236         fd = contract_getfile(id, "status", O_RDONLY);
4237         if (fd < 0)
4238                 return (-1);
4239 
4240         err = ct_status_read(fd, CTD_COMMON, &sh);
4241         if (err != 0) {
4242                 console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
4243                     id, strerror(err));
4244                 (void) close(fd);
4245                 return (-1);
4246         }
4247 
4248         (void) close(fd);
4249 
4250         *cp = ct_status_get_cookie(sh);
4251 
4252         ct_status_free(sh);
4253         return (0);
4254 }
4255 
4256 static void
4257 contract_ack(ct_evthdl_t e)
4258 {
4259         int fd;
4260 
4261         if (ct_event_get_flags(e) & CTE_INFO)
4262                 return;
4263 
4264         fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
4265         if (fd < 0)
4266                 return;
4267 
4268         (void) ct_ctl_ack(fd, ct_event_get_evid(e));
4269         (void) close(fd);
4270 }
4271 
4272 /*
4273  * Process a contract event.
4274  */
4275 static void
4276 contract_event(struct pollfd *poll)
4277 {
4278         ct_evthdl_t e;
4279         int err;
4280         ctid_t ctid;
4281 
4282         if (!(poll->revents & POLLIN)) {
4283                 if (poll->revents & POLLERR)
4284                         console(B_TRUE,
4285                             "Unknown poll error on my process contract "
4286                             "pbundle.\n");
4287                 return;
4288         }
4289 
4290         err = ct_event_read(poll->fd, &e);
4291         if (err != 0) {
4292                 console(B_TRUE, "Error retrieving contract event: %s.\n",
4293                     strerror(err));
4294                 return;
4295         }
4296 
4297         ctid = ct_event_get_ctid(e);
4298 
4299         if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
4300                 uint64_t cookie;
4301                 int ret, abandon = 1;
4302 
4303                 /* If it's svc.startd, restart it.  Else, abandon. */
4304                 ret = contract_cookie(ctid, &cookie);
4305 
4306                 if (ret == 0) {
4307                         if (cookie == STARTD_COOKIE &&
4308                             do_restart_startd) {
4309                                 if (smf_debug)
4310                                         console(B_TRUE, "Restarting "
4311                                             "svc.startd.\n");
4312 
4313                                 /*
4314                                  * Account for the failure.  If the failure rate
4315                                  * exceeds a threshold, then drop to maintenance
4316                                  * mode.
4317                                  */
4318                                 startd_record_failure();
4319                                 if (startd_failure_rate_critical())
4320                                         enter_maintenance();
4321 
4322                                 if (startd_tmpl < 0)
4323                                         console(B_TRUE,
4324                                             "Restarting svc.startd in "
4325                                             "improper contract (bad "
4326                                             "template).\n");
4327 
4328                                 (void) startd_run(startd_cline, startd_tmpl,
4329                                     ctid);
4330 
4331                                 abandon = 0;
4332                         }
4333                 }
4334 
4335                 if (abandon && (err = contract_abandon_id(ctid))) {
4336                         console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
4337                             ctid, strerror(err));
4338                 }
4339 
4340                 /*
4341                  * No need to acknowledge the event since either way the
4342                  * originating contract should be abandoned.
4343                  */
4344         } else {
4345                 console(B_TRUE,
4346                     "Received contract event of unexpected type %d from "
4347                     "contract %ld.\n", ct_event_get_type(e), ctid);
4348 
4349                 if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
4350                         /* Allow unexpected critical events to be released. */
4351                         contract_ack(e);
4352         }
4353 
4354         ct_event_free(e);
4355 }
4356 
4357 /*
4358  * svc.startd(1M) Management
4359  */
4360 
4361 /*
4362  * (Re)start svc.startd(1M).  old_ctid should be the contract ID of the old
4363  * contract, or 0 if we're starting it for the first time.  If wait is true
4364  * we'll wait for and return the exit value of the child.
4365  */
4366 static int
4367 startd_run(const char *cline, int tmpl, ctid_t old_ctid)
4368 {
4369         int err, i, ret, did_activate;
4370         pid_t pid;
4371         struct stat sb;
4372 
4373         if (cline[0] == '\0')
4374                 return (-1);
4375 
4376         /*
4377          * Don't restart startd if the system is rebooting or shutting down.
4378          */
4379         do {
4380                 ret = stat("/etc/svc/volatile/resetting", &sb);
4381         } while (ret == -1 && errno == EINTR);
4382 
4383         if (ret == 0) {
4384                 if (smf_debug)
4385                         console(B_TRUE, "Quiescing for reboot.\n");
4386                 (void) pause();
4387                 return (-1);
4388         }
4389 
4390         err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4391         if (err == EINVAL) {
4392                 console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
4393                 tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4394                     CT_PR_EV_HWERR, STARTD_COOKIE);
4395 
4396                 err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4397         }
4398         if (err != 0) {
4399                 console(B_TRUE,
4400                     "Couldn't set transfer parameter of contract template: "
4401                     "%s.\n", strerror(err));
4402         }
4403 
4404         if ((err = ct_pr_tmpl_set_svc_fmri(startd_tmpl,
4405             SCF_SERVICE_STARTD)) != 0)
4406                 console(B_TRUE,
4407                     "Can not set svc_fmri in contract template: %s\n",
4408                     strerror(err));
4409         if ((err = ct_pr_tmpl_set_svc_aux(startd_tmpl,
4410             startd_svc_aux)) != 0)
4411                 console(B_TRUE,
4412                     "Can not set svc_aux in contract template: %s\n",
4413                     strerror(err));
4414         did_activate = !(ct_tmpl_activate(tmpl));
4415         if (!did_activate)
4416                 console(B_TRUE,
4417                     "Template activation failed; not starting \"%s\" in "
4418                     "proper contract.\n", cline);
4419 
4420         /* Hold SIGCLD so we can wait if necessary. */
4421         (void) sighold(SIGCLD);
4422 
4423         while ((pid = fork()) < 0) {
4424                 if (errno == EPERM) {
4425                         console(B_TRUE, "Insufficient permission to fork.\n");
4426 
4427                         /* Now that's a doozy. */
4428                         exit(1);
4429                 }
4430 
4431                 console(B_TRUE,
4432                     "fork() for svc.startd failed: %s.  Will retry in 1 "
4433                     "second...\n", strerror(errno));
4434 
4435                 (void) sleep(1);
4436 
4437                 /* Eventually give up? */
4438         }
4439 
4440         if (pid == 0) {
4441                 /* child */
4442 
4443                 /* See the comment in efork() */
4444                 for (i = SIGHUP; i <= SIGRTMAX; ++i) {
4445                         if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
4446                                 (void) sigset(i, SIG_IGN);
4447                         else
4448                                 (void) sigset(i, SIG_DFL);
4449                 }
4450 
4451                 if (smf_options != NULL) {
4452                         /* Put smf_options in the environment. */
4453                         glob_envp[glob_envn] =
4454                             malloc(sizeof ("SMF_OPTIONS=") - 1 +
4455                             strlen(smf_options) + 1);
4456 
4457                         if (glob_envp[glob_envn] != NULL) {
4458                                 /* LINTED */
4459                                 (void) sprintf(glob_envp[glob_envn],
4460                                     "SMF_OPTIONS=%s", smf_options);
4461                                 glob_envp[glob_envn+1] = NULL;
4462                         } else {
4463                                 console(B_TRUE,
4464                                     "Could not set SMF_OPTIONS (%s).\n",
4465                                     strerror(errno));
4466                         }
4467                 }
4468 
4469                 if (smf_debug)
4470                         console(B_TRUE, "Executing svc.startd\n");
4471 
4472                 (void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
4473 
4474                 console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
4475                     strerror(errno));
4476 
4477                 exit(1);
4478         }
4479 
4480         /* parent */
4481 
4482         if (did_activate) {
4483                 if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
4484                         (void) ct_tmpl_clear(tmpl);
4485         }
4486 
4487         /* Clear the old_ctid reference so the kernel can reclaim it. */
4488         if (old_ctid != 0)
4489                 (void) ct_pr_tmpl_set_transfer(tmpl, 0);
4490 
4491         (void) sigrelse(SIGCLD);
4492 
4493         return (0);
4494 }
4495 
4496 /*
4497  * void startd_record_failure(void)
4498  *   Place the current time in our circular array of svc.startd failures.
4499  */
4500 void
4501 startd_record_failure()
4502 {
4503         int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
4504 
4505         startd_failure_time[index] = gethrtime();
4506 }
4507 
4508 /*
4509  * int startd_failure_rate_critical(void)
4510  *   Return true if the average failure interval is less than the permitted
4511  *   interval.  Implicit success if insufficient measurements for an average
4512  *   exist.
4513  */
4514 int
4515 startd_failure_rate_critical()
4516 {
4517         int n = startd_failure_index;
4518         hrtime_t avg_ns = 0;
4519 
4520         if (startd_failure_index < NSTARTD_FAILURE_TIMES)
4521                 return (0);
4522 
4523         avg_ns =
4524             (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
4525             startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
4526             NSTARTD_FAILURE_TIMES;
4527 
4528         return (avg_ns < STARTD_FAILURE_RATE_NS);
4529 }
4530 
4531 /*
4532  * returns string that must be free'd
4533  */
4534 
4535 static char
4536 *audit_boot_msg()
4537 {
4538         char            *b, *p;
4539         char            desc[] = "booted";
4540         zoneid_t        zid = getzoneid();
4541 
4542         b = malloc(sizeof (desc) + MAXNAMELEN + 3);
4543         if (b == NULL)
4544                 return (b);
4545 
4546         p = b;
4547         p += strlcpy(p, desc, sizeof (desc));
4548         if (zid != GLOBAL_ZONEID) {
4549                 p += strlcpy(p, ": ", 3);
4550                 (void) getzonenamebyid(zid, p, MAXNAMELEN);
4551         }
4552         return (b);
4553 }
4554 
4555 /*
4556  * Generate AUE_init_solaris audit record.  Return 1 if
4557  * auditing is enabled in case the caller cares.
4558  *
4559  * In the case of userint() or a local zone invocation of
4560  * one_true_init, the process initially contains the audit
4561  * characteristics of the process that invoked init.  The first pass
4562  * through here uses those characteristics then for the case of
4563  * one_true_init in a local zone, clears them so subsequent system
4564  * state changes won't be attributed to the person who booted the
4565  * zone.
4566  */
4567 static int
4568 audit_put_record(int pass_fail, int status, char *msg)
4569 {
4570         adt_session_data_t      *ah;
4571         adt_event_data_t        *event;
4572 
4573         if (!adt_audit_enabled())
4574                 return (0);
4575 
4576         /*
4577          * the PROC_DATA picks up the context to tell whether this is
4578          * an attributed record (auid = -2 is unattributed)
4579          */
4580         if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
4581                 console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4582                 return (1);
4583         }
4584         event = adt_alloc_event(ah, ADT_init_solaris);
4585         if (event == NULL) {
4586                 console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4587                 (void) adt_end_session(ah);
4588                 return (1);
4589         }
4590         event->adt_init_solaris.info = msg;  /* NULL is ok here */
4591 
4592         if (adt_put_event(event, pass_fail, status)) {
4593                 console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4594                 (void) adt_end_session(ah);
4595                 return (1);
4596         }
4597         adt_free_event(event);
4598 
4599         (void) adt_end_session(ah);
4600 
4601         return (1);
4602 }