1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 DEY Storage Systems, Inc.
  24  * Copyright (c) 2014 Gary Mills
  25  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  26  * Copyright 2016 Joyent, Inc.
  27  */
  28 
  29 /*
  30  * zlogin provides five types of login which allow users in the global
  31  * zone to access non-global zones.
  32  *
  33  * - "interactive login" is similar to rlogin(1); for example, the user could
  34  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
  35  *   granted a new pty (which is then shoved into the zone), and an I/O
  36  *   loop between parent and child processes takes care of the interactive
  37  *   session.  In this mode, login(1) (and its -c option, which means
  38  *   "already authenticated") is employed to take care of the initialization
  39  *   of the user's session.
  40  *
  41  * - "non-interactive login" is similar to su(1M); the user could issue
  42  *   'zlogin my-zone ls -l' and the command would be run as specified.
  43  *   In this mode, zlogin sets up pipes as the communication channel, and
  44  *   'su' is used to do the login setup work.
  45  *
  46  * - "interactive command" is a combination of the above two modes where
  47  *   a command is provide like the non-interactive case, but the -i option is
  48  *   also provided to make things interactive. For example, the user could
  49  *   issue 'zlogin -i my-zone /bin/sh'. In this mode neither 'login -c' nor
  50  *   'su root -c' is prepended to the command invocation. Because of this
  51  *   there will be no wtmpx login record within the zone.
  52  *
  53  * - "console login" is the equivalent to accessing the tip line for a
  54  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
  55  *   In this mode, zlogin contacts the zoneadmd process via unix domain
  56  *   socket.  If zoneadmd is not running, it starts it.  This allows the
  57  *   console to be available anytime the zone is installed, regardless of
  58  *   whether it is running.
  59  *
  60  * - "standalone-processs interactive" is specified with -I and connects to
  61  *   the zone's stdin, stdout and stderr zfd(7D) devices.
  62  */
  63 
  64 #include <sys/socket.h>
  65 #include <sys/termios.h>
  66 #include <sys/utsname.h>
  67 #include <sys/stat.h>
  68 #include <sys/types.h>
  69 #include <sys/contract/process.h>
  70 #include <sys/ctfs.h>
  71 #include <sys/brand.h>
  72 #include <sys/wait.h>
  73 #include <alloca.h>
  74 #include <assert.h>
  75 #include <ctype.h>
  76 #include <paths.h>
  77 #include <door.h>
  78 #include <errno.h>
  79 #include <nss_dbdefs.h>
  80 #include <poll.h>
  81 #include <priv.h>
  82 #include <pwd.h>
  83 #include <unistd.h>
  84 #include <utmpx.h>
  85 #include <sac.h>
  86 #include <signal.h>
  87 #include <stdarg.h>
  88 #include <stdio.h>
  89 #include <stdlib.h>
  90 #include <string.h>
  91 #include <strings.h>
  92 #include <stropts.h>
  93 #include <wait.h>
  94 #include <zone.h>
  95 #include <fcntl.h>
  96 #include <libdevinfo.h>
  97 #include <libintl.h>
  98 #include <locale.h>
  99 #include <libzonecfg.h>
 100 #include <libcontract.h>
 101 #include <libbrand.h>
 102 #include <auth_list.h>
 103 #include <auth_attr.h>
 104 #include <secdb.h>
 105 
 106 static int masterfd = -1;
 107 static int ctlfd = -1;
 108 static struct termios save_termios;
 109 static struct termios effective_termios;
 110 static int save_fd;
 111 static struct winsize winsize;
 112 static volatile int dead;
 113 static volatile pid_t child_pid = -1;
 114 static int interactive = 0;
 115 static priv_set_t *dropprivs;
 116 static unsigned int connect_flags = 0;
 117 
 118 static int nocmdchar = 0;
 119 static int failsafe = 0;
 120 static char cmdchar = '~';
 121 static int quiet = 0;
 122 static char zonebrand[MAXNAMELEN];
 123 
 124 static int pollerr = 0;
 125 
 126 static const char *pname;
 127 static char *username;
 128 
 129 /*
 130  * When forced_login is true, the user is not prompted
 131  * for an authentication password in the target zone.
 132  */
 133 static boolean_t forced_login = B_FALSE;
 134 
 135 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 136 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 137 #endif
 138 
 139 #define SUPATH1 "/usr/bin/su"
 140 #define SUPATH2 "/bin/su"
 141 #define FAILSAFESHELL   "/sbin/sh"
 142 #define DEFAULTSHELL    "/sbin/sh"
 143 #define DEF_PATH        "/usr/sbin:/usr/bin"
 144 #define LX_DEF_PATH     "/bin:/usr/sbin:/usr/bin"
 145 
 146 #define MAX_RETRY       30
 147 
 148 #define CLUSTER_BRAND_NAME      "cluster"
 149 
 150 /*
 151  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
 152  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
 153  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
 154  * also chosen in conjunction with the HI_WATER setting to make sure we
 155  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
 156  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
 157  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
 158  * is less than HI_WATER data already in the pipe.
 159  */
 160 #define ZLOGIN_BUFSIZ   8192
 161 #define ZLOGIN_RDBUFSIZ 1024
 162 #define HI_WATER        8192
 163 
 164 /*
 165  * See canonify() below.  CANONIFY_LEN is the maximum length that a
 166  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
 167  */
 168 #define CANONIFY_LEN 5
 169 
 170 static void
 171 usage(void)
 172 {
 173         (void) fprintf(stderr, gettext("usage: %s [-dinCEINQS] [-e cmdchar] "
 174             "[-l user] zonename [command [args ...] ]\n"), pname);
 175         exit(2);
 176 }
 177 
 178 static const char *
 179 getpname(const char *arg0)
 180 {
 181         const char *p = strrchr(arg0, '/');
 182 
 183         if (p == NULL)
 184                 p = arg0;
 185         else
 186                 p++;
 187 
 188         pname = p;
 189         return (p);
 190 }
 191 
 192 static void
 193 zerror(const char *fmt, ...)
 194 {
 195         va_list alist;
 196 
 197         (void) fprintf(stderr, "%s: ", pname);
 198         va_start(alist, fmt);
 199         (void) vfprintf(stderr, fmt, alist);
 200         va_end(alist);
 201         (void) fprintf(stderr, "\n");
 202 }
 203 
 204 static void
 205 zperror(const char *str)
 206 {
 207         const char *estr;
 208 
 209         if ((estr = strerror(errno)) != NULL)
 210                 (void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
 211         else
 212                 (void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
 213 }
 214 
 215 /*
 216  * The first part of our privilege dropping scheme needs to be called before
 217  * fork(), since we must have it for security; we don't want to be surprised
 218  * later that we couldn't allocate the privset.
 219  */
 220 static int
 221 prefork_dropprivs()
 222 {
 223         if ((dropprivs = priv_allocset()) == NULL)
 224                 return (1);
 225 
 226         priv_basicset(dropprivs);
 227         (void) priv_delset(dropprivs, PRIV_PROC_INFO);
 228         (void) priv_delset(dropprivs, PRIV_PROC_FORK);
 229         (void) priv_delset(dropprivs, PRIV_PROC_EXEC);
 230         (void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
 231 
 232         /*
 233          * We need to keep the basic privilege PROC_SESSION and all unknown
 234          * basic privileges as well as the privileges PROC_ZONE and
 235          * PROC_OWNER in order to query session information and
 236          * send signals.
 237          */
 238         if (interactive == 0) {
 239                 (void) priv_addset(dropprivs, PRIV_PROC_ZONE);
 240                 (void) priv_addset(dropprivs, PRIV_PROC_OWNER);
 241         } else {
 242                 (void) priv_delset(dropprivs, PRIV_PROC_SESSION);
 243         }
 244 
 245         return (0);
 246 }
 247 
 248 /*
 249  * The second part of the privilege drop.  We are paranoid about being attacked
 250  * by the zone, so we drop all privileges.  This should prevent a compromise
 251  * which gets us to fork(), exec(), symlink(), etc.
 252  */
 253 static void
 254 postfork_dropprivs()
 255 {
 256         if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
 257                 zperror(gettext("Warning: could not set permitted privileges"));
 258         }
 259         if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
 260                 zperror(gettext("Warning: could not set limit privileges"));
 261         }
 262         if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
 263                 zperror(gettext("Warning: could not set inheritable "
 264                     "privileges"));
 265         }
 266 }
 267 
 268 static int
 269 connect_zone_sock(const char *zname, const char *suffix, boolean_t verbose)
 270 {
 271         int sockfd = -1;
 272         struct sockaddr_un servaddr;
 273 
 274         if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
 275                 if (verbose)
 276                         zperror(gettext("could not create socket"));
 277                 return (-1);
 278         }
 279 
 280         bzero(&servaddr, sizeof (servaddr));
 281         servaddr.sun_family = AF_UNIX;
 282         (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
 283             "%s/%s.%s", ZONES_TMPDIR, zname, suffix);
 284         if (connect(sockfd, (struct sockaddr *)&servaddr,
 285             sizeof (servaddr)) == -1) {
 286                 if (verbose)
 287                         zperror(gettext("Could not connect to zone"));
 288                 (void) close(sockfd);
 289                 return (-1);
 290         }
 291         return (sockfd);
 292 }
 293 
 294 
 295 static int
 296 handshake_zone_sock(int sockfd, unsigned int flags)
 297 {
 298         char clientid[MAXPATHLEN];
 299         char handshake[MAXPATHLEN], c;
 300         int msglen;
 301         int i = 0, err = 0;
 302 
 303         msglen = snprintf(clientid, sizeof (clientid), "IDENT %s %u\n",
 304             setlocale(LC_MESSAGES, NULL), flags);
 305 
 306         if (msglen >= sizeof (clientid) || msglen < 0) {
 307                 zerror("protocol error");
 308                 return (-1);
 309         }
 310 
 311         if (write(sockfd, clientid, msglen) != msglen) {
 312                 zerror("protocol error");
 313                 return (-1);
 314         }
 315 
 316         /*
 317          * Take care not to accumulate more than our fill, and leave room for
 318          * the NUL at the end.
 319          */
 320         bzero(handshake, sizeof (handshake));
 321         while ((err = read(sockfd, &c, 1)) == 1) {
 322                 if (i >= (sizeof (handshake) - 1))
 323                         break;
 324                 if (c == '\n')
 325                         break;
 326                 handshake[i] = c;
 327                 i++;
 328         }
 329 
 330         /*
 331          * If something went wrong during the handshake we bail.
 332          * Perhaps the server died off.
 333          */
 334         if (err == -1) {
 335                 zperror(gettext("Could not connect to zone"));
 336                 return (-1);
 337         }
 338 
 339         if (strncmp(handshake, "OK", sizeof (handshake)) != 0) {
 340                 zerror(gettext("Zone is already in use by process ID %s."),
 341                     handshake);
 342                 return (-1);
 343         }
 344 
 345         return (0);
 346 }
 347 
 348 static int
 349 send_ctl_sock(const char *buf, size_t len)
 350 {
 351         char rbuf[BUFSIZ];
 352         int i;
 353         if (ctlfd == -1) {
 354                 return (-1);
 355         }
 356         if (write(ctlfd, buf, len) != len) {
 357                 return (-1);
 358         }
 359         /* read the response */
 360         for (i = 0; i < (BUFSIZ - 1); i++) {
 361                 char c;
 362                 if (read(ctlfd, &c, 1) != 1 || c == '\n' || c == '\0') {
 363                         break;
 364                 }
 365                 rbuf[i] = c;
 366         }
 367         rbuf[i+1] = '\0';
 368         if (strncmp("OK", rbuf, BUFSIZ) != 0) {
 369                 return (-1);
 370         }
 371         return (0);
 372 }
 373 /*
 374  * Routines to handle pty creation upon zone entry and to shuttle I/O back
 375  * and forth between the two terminals.  We also compute and store the
 376  * name of the slave terminal associated with the master side.
 377  */
 378 static int
 379 get_master_pty()
 380 {
 381         if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
 382                 zperror(gettext("failed to obtain a pseudo-tty"));
 383                 return (-1);
 384         }
 385         if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
 386                 zperror(gettext("failed to get terminal settings from stdin"));
 387                 return (-1);
 388         }
 389         (void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
 390 
 391         return (0);
 392 }
 393 
 394 /*
 395  * This is a bit tricky; normally a pts device will belong to the zone it
 396  * is granted to.  But in the case of "entering" a zone, we need to establish
 397  * the pty before entering the zone so that we can vector I/O to and from it
 398  * from the global zone.
 399  *
 400  * We use the zonept() call to let the ptm driver know what we are up to;
 401  * the only other hairy bit is the setting of zoneslavename (which happens
 402  * above, in get_master_pty()).
 403  */
 404 static int
 405 init_slave_pty(zoneid_t zoneid, char *devroot)
 406 {
 407         int slavefd = -1;
 408         char *slavename, zoneslavename[MAXPATHLEN];
 409 
 410         /*
 411          * Set slave permissions, zone the pts, then unlock it.
 412          */
 413         if (grantpt(masterfd) != 0) {
 414                 zperror(gettext("grantpt failed"));
 415                 return (-1);
 416         }
 417 
 418         if (unlockpt(masterfd) != 0) {
 419                 zperror(gettext("unlockpt failed"));
 420                 return (-1);
 421         }
 422 
 423         /*
 424          * We must open the slave side before zoning this pty; otherwise
 425          * the kernel would refuse us the open-- zoning a pty makes it
 426          * inaccessible to the global zone.  Note we are trying to open
 427          * the device node via the $ZONEROOT/dev path for this pty.
 428          *
 429          * Later we'll close the slave out when once we've opened it again
 430          * from within the target zone.  Blarg.
 431          */
 432         if ((slavename = ptsname(masterfd)) == NULL) {
 433                 zperror(gettext("failed to get name for pseudo-tty"));
 434                 return (-1);
 435         }
 436 
 437         (void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
 438             devroot, slavename);
 439 
 440         if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
 441                 zerror(gettext("failed to open %s: %s"), zoneslavename,
 442                     strerror(errno));
 443                 return (-1);
 444         }
 445 
 446         /*
 447          * Push hardware emulation (ptem), line discipline (ldterm),
 448          * and V7/4BSD/Xenix compatibility (ttcompat) modules.
 449          */
 450         if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
 451                 zperror(gettext("failed to push ptem module"));
 452                 if (!failsafe)
 453                         goto bad;
 454         }
 455 
 456         /*
 457          * Anchor the stream to prevent malicious I_POPs; we prefer to do
 458          * this prior to entering the zone so that we can detect any errors
 459          * early, and so that we can set the anchor from the global zone.
 460          */
 461         if (ioctl(slavefd, I_ANCHOR) == -1) {
 462                 zperror(gettext("failed to set stream anchor"));
 463                 if (!failsafe)
 464                         goto bad;
 465         }
 466 
 467         if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
 468                 zperror(gettext("failed to push ldterm module"));
 469                 if (!failsafe)
 470                         goto bad;
 471         }
 472         if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
 473                 zperror(gettext("failed to push ttcompat module"));
 474                 if (!failsafe)
 475                         goto bad;
 476         }
 477 
 478         /*
 479          * Propagate terminal settings from the external term to the new one.
 480          */
 481         if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
 482                 zperror(gettext("failed to set terminal settings"));
 483                 if (!failsafe)
 484                         goto bad;
 485         }
 486         (void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
 487 
 488         if (zonept(masterfd, zoneid) != 0) {
 489                 zperror(gettext("could not set zoneid of pty"));
 490                 goto bad;
 491         }
 492 
 493         return (slavefd);
 494 
 495 bad:
 496         (void) close(slavefd);
 497         return (-1);
 498 }
 499 
 500 /*
 501  * Place terminal into raw mode.
 502  */
 503 static int
 504 set_tty_rawmode(int fd)
 505 {
 506         struct termios term;
 507         if (tcgetattr(fd, &term) < 0) {
 508                 zperror(gettext("failed to get user terminal settings"));
 509                 return (-1);
 510         }
 511 
 512         /* Stash for later, so we can revert back to previous mode */
 513         save_termios = term;
 514         save_fd = fd;
 515 
 516         /* disable 8->7 bit strip, start/stop, enable any char to restart */
 517         term.c_iflag &= ~(ISTRIP|IXON|IXANY);
 518         /* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
 519         term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
 520         /* disable output post-processing */
 521         term.c_oflag &= ~OPOST;
 522         /* disable canonical mode, signal chars, echo & extended functions */
 523         term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
 524 
 525         term.c_cc[VMIN] = 1;    /* byte-at-a-time */
 526         term.c_cc[VTIME] = 0;
 527 
 528         if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
 529                 zperror(gettext("failed to set user terminal to raw mode"));
 530                 return (-1);
 531         }
 532 
 533         /*
 534          * We need to know the value of VEOF so that we can properly process for
 535          * client-side ~<EOF>.  But we have obliterated VEOF in term,
 536          * because VMIN overloads the same array slot in non-canonical mode.
 537          * Stupid @&^%!
 538          *
 539          * So here we construct the "effective" termios from the current
 540          * terminal settings, and the corrected VEOF and VEOL settings.
 541          */
 542         if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
 543                 zperror(gettext("failed to get user terminal settings"));
 544                 return (-1);
 545         }
 546         effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
 547         effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
 548 
 549         return (0);
 550 }
 551 
 552 /*
 553  * Copy terminal window size from our terminal to the pts.
 554  */
 555 /*ARGSUSED*/
 556 static void
 557 sigwinch(int s)
 558 {
 559         struct winsize ws;
 560 
 561         if (ioctl(0, TIOCGWINSZ, &ws) == 0) {
 562                 if (ctlfd != -1) {
 563                         char buf[BUFSIZ];
 564                         (void) snprintf(buf, sizeof (buf),
 565                             "TIOCSWINSZ %hu %hu\n", ws.ws_row, ws.ws_col);
 566                         (void) send_ctl_sock(buf, strlen(buf));
 567                 } else {
 568                         (void) ioctl(masterfd, TIOCSWINSZ, &ws);
 569                 }
 570         }
 571 }
 572 
 573 /*
 574  * Toggle zfd EOF mode and notify zoneadmd
 575  */
 576 /*ARGSUSED*/
 577 static void
 578 sigusr1(int s)
 579 {
 580         connect_flags ^= ZLOGIN_ZFD_EOF;
 581         if (ctlfd != -1) {
 582                 char buf[BUFSIZ];
 583                 (void) snprintf(buf, sizeof (buf), "SETFLAGS %u\n",
 584                     connect_flags);
 585                 (void) send_ctl_sock(buf, strlen(buf));
 586         }
 587 }
 588 
 589 static volatile int close_on_sig = -1;
 590 
 591 static void
 592 /*ARGSUSED*/
 593 sigcld(int s)
 594 {
 595         int status;
 596         pid_t pid;
 597 
 598         /*
 599          * Peek at the exit status.  If this isn't the process we cared
 600          * about, then just reap it.
 601          */
 602         if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
 603                 if (pid == child_pid &&
 604                     (WIFEXITED(status) || WIFSIGNALED(status))) {
 605                         dead = 1;
 606                         if (close_on_sig != -1) {
 607                                 (void) write(close_on_sig, "a", 1);
 608                                 (void) close(close_on_sig);
 609                                 close_on_sig = -1;
 610                         }
 611                 } else {
 612                         (void) waitpid(pid, &status, WNOHANG);
 613                 }
 614         }
 615 }
 616 
 617 /*
 618  * Some signals (currently, SIGINT) must be forwarded on to the process
 619  * group of the child process.
 620  */
 621 static void
 622 sig_forward(int s)
 623 {
 624         if (child_pid != -1) {
 625                 (void) sigsend(P_PGID, child_pid, s);
 626         }
 627 }
 628 
 629 /*
 630  * reset terminal settings for global environment
 631  */
 632 static void
 633 reset_tty()
 634 {
 635         (void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
 636 }
 637 
 638 /*
 639  * Convert character to printable representation, for display with locally
 640  * echoed command characters (like when we need to display ~^D)
 641  */
 642 static void
 643 canonify(char c, char *cc)
 644 {
 645         if (isprint(c)) {
 646                 cc[0] = c;
 647                 cc[1] = '\0';
 648         } else if (c >= 0 && c <= 31) {   /* ^@ through ^_ */
 649                 cc[0] = '^';
 650                 cc[1] = c + '@';
 651                 cc[2] = '\0';
 652         } else {
 653                 cc[0] = '\\';
 654                 cc[1] = ((c >> 6) & 7) + '0';
 655                 cc[2] = ((c >> 3) & 7) + '0';
 656                 cc[3] = (c & 7) + '0';
 657                 cc[4] = '\0';
 658         }
 659 }
 660 
 661 /*
 662  * process_user_input watches the input stream for the escape sequence for
 663  * 'quit' (by default, tilde-period).  Because we might be fed just one
 664  * keystroke at a time, state associated with the user input (are we at the
 665  * beginning of the line?  are we locally echoing the next character?) is
 666  * maintained by beginning_of_line and local_echo across calls to the routine.
 667  * If the write to outfd fails, we'll try to read from infd in an attempt
 668  * to prevent deadlock between the two processes.
 669  *
 670  * This routine returns -1 when the 'quit' escape sequence has been issued,
 671  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
 672  */
 673 static int
 674 process_user_input(int outfd, int infd)
 675 {
 676         static boolean_t beginning_of_line = B_TRUE;
 677         static boolean_t local_echo = B_FALSE;
 678         char ibuf[ZLOGIN_BUFSIZ];
 679         int nbytes;
 680         char *buf = ibuf;
 681         char c = *buf;
 682 
 683         nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 684         if (nbytes == -1 && (errno != EINTR || dead))
 685                 return (-1);
 686 
 687         if (nbytes == -1)       /* The read was interrupted. */
 688                 return (0);
 689 
 690         /* 0 read means EOF, close the pipe to the child */
 691         if (nbytes == 0)
 692                 return (1);
 693 
 694         for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
 695                 buf++;
 696                 if (beginning_of_line && !nocmdchar) {
 697                         beginning_of_line = B_FALSE;
 698                         if (c == cmdchar) {
 699                                 local_echo = B_TRUE;
 700                                 continue;
 701                         }
 702                 } else if (local_echo) {
 703                         local_echo = B_FALSE;
 704                         if (c == '.' || c == effective_termios.c_cc[VEOF]) {
 705                                 char cc[CANONIFY_LEN];
 706 
 707                                 canonify(c, cc);
 708                                 (void) write(STDOUT_FILENO, &cmdchar, 1);
 709                                 (void) write(STDOUT_FILENO, cc, strlen(cc));
 710                                 return (-1);
 711                         }
 712                 }
 713 retry:
 714                 if (write(outfd, &c, 1) <= 0) {
 715                         /*
 716                          * Since the fd we are writing to is opened with
 717                          * O_NONBLOCK it is possible to get EAGAIN if the
 718                          * pipe is full.  One way this could happen is if we
 719                          * are writing a lot of data into the pipe in this loop
 720                          * and the application on the other end is echoing that
 721                          * data back out to its stdout.  The output pipe can
 722                          * fill up since we are stuck here in this loop and not
 723                          * draining the other pipe.  We can try to read some of
 724                          * the data to see if we can drain the pipe so that the
 725                          * application can continue to make progress.  The read
 726                          * is non-blocking so we won't hang here.  We also wait
 727                          * a bit before retrying since there could be other
 728                          * reasons why the pipe is full and we don't want to
 729                          * continuously retry.
 730                          */
 731                         if (errno == EAGAIN) {
 732                                 struct timespec rqtp;
 733                                 int ln;
 734                                 char obuf[ZLOGIN_BUFSIZ];
 735 
 736                                 if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
 737                                         (void) write(STDOUT_FILENO, obuf, ln);
 738 
 739                                 /* sleep for 10 milliseconds */
 740                                 rqtp.tv_sec = 0;
 741                                 rqtp.tv_nsec = MSEC2NSEC(10);
 742                                 (void) nanosleep(&rqtp, NULL);
 743                                 if (!dead)
 744                                         goto retry;
 745                         }
 746 
 747                         return (-1);
 748                 }
 749                 beginning_of_line = (c == '\r' || c == '\n' ||
 750                     c == effective_termios.c_cc[VKILL] ||
 751                     c == effective_termios.c_cc[VEOL] ||
 752                     c == effective_termios.c_cc[VSUSP] ||
 753                     c == effective_termios.c_cc[VINTR]);
 754         }
 755         return (0);
 756 }
 757 
 758 /*
 759  * This function prevents deadlock between zlogin and the application in the
 760  * zone that it is talking to.  This can happen when we read from zlogin's
 761  * stdin and write the data down the pipe to the application.  If the pipe
 762  * is full, we'll block in the write.  Because zlogin could be blocked in
 763  * the write, it would never read the application's stdout/stderr so the
 764  * application can then block on those writes (when the pipe fills up).  If the
 765  * the application gets blocked this way, it can never get around to reading
 766  * its stdin so that zlogin can unblock from its write.  Once in this state,
 767  * the two processes are deadlocked.
 768  *
 769  * To prevent this, we want to verify that we can write into the pipe before we
 770  * read from our stdin.  If the pipe already is pretty full, we bypass the read
 771  * for now.  We'll circle back here again after the poll() so that we can
 772  * try again.  When this function is called, we already know there is data
 773  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
 774  * stdin is EOF, and 0 if everything is ok (even though we might not have
 775  * read/written any data into the pipe on this iteration).
 776  */
 777 static int
 778 process_raw_input(int stdin_fd, int appin_fd)
 779 {
 780         int cc;
 781         struct stat64 sb;
 782         char ibuf[ZLOGIN_RDBUFSIZ];
 783 
 784         /* Check how much data is already in the pipe */
 785         if (fstat64(appin_fd, &sb) == -1) {
 786                 perror("stat failed");
 787                 return (-1);
 788         }
 789 
 790         if (dead)
 791                 return (-1);
 792 
 793         /*
 794          * The pipe already has a lot of data in it,  don't write any more
 795          * right now.
 796          */
 797         if (sb.st_size >= HI_WATER)
 798                 return (0);
 799 
 800         cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 801         if (cc == -1 && (errno != EINTR || dead))
 802                 return (-1);
 803 
 804         if (cc == -1)   /* The read was interrupted. */
 805                 return (0);
 806 
 807         /* 0 read means EOF, close the pipe to the child */
 808         if (cc == 0)
 809                 return (1);
 810 
 811         /*
 812          * stdin_fd is stdin of the target; so, the thing we'll write the user
 813          * data *to*.
 814          */
 815         if (write(stdin_fd, ibuf, cc) == -1)
 816                 return (-1);
 817 
 818         return (0);
 819 }
 820 
 821 /*
 822  * Write the output from the application running in the zone.  We can get
 823  * a signal during the write (usually it would be SIGCHLD when the application
 824  * has exited) so we loop to make sure we have written all of the data we read.
 825  */
 826 static int
 827 process_output(int in_fd, int out_fd)
 828 {
 829         int wrote = 0;
 830         int cc;
 831         char ibuf[ZLOGIN_BUFSIZ];
 832 
 833         cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
 834         if (cc == -1 && (errno != EINTR || dead))
 835                 return (-1);
 836         if (cc == 0)    /* EOF */
 837                 return (-1);
 838         if (cc == -1)   /* The read was interrupted. */
 839                 return (0);
 840 
 841         do {
 842                 int len;
 843 
 844                 len = write(out_fd, ibuf + wrote, cc - wrote);
 845                 if (len == -1 && errno != EINTR)
 846                         return (-1);
 847                 if (len != -1)
 848                         wrote += len;
 849         } while (wrote < cc);
 850 
 851         return (0);
 852 }
 853 
 854 /*
 855  * This is the main I/O loop, and is shared across all zlogin modes.
 856  * Parameters:
 857  *      stdin_fd:  The fd representing 'stdin' for the slave side; input to
 858  *                 the zone will be written here.
 859  *
 860  *      appin_fd:  The fd representing the other end of the 'stdin' pipe (when
 861  *                 we're running non-interactive); used in process_raw_input
 862  *                 to ensure we don't fill up the application's stdin pipe.
 863  *
 864  *      stdout_fd: The fd representing 'stdout' for the slave side; output
 865  *                 from the zone will arrive here.
 866  *
 867  *      stderr_fd: The fd representing 'stderr' for the slave side; output
 868  *                 from the zone will arrive here.
 869  *
 870  *      raw_mode:  If TRUE, then no processing (for example, for '~.') will
 871  *                 be performed on the input coming from STDIN.
 872  *
 873  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
 874  * mode supplies a stderr).
 875  *
 876  */
 877 static void
 878 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
 879     boolean_t raw_mode)
 880 {
 881         struct pollfd pollfds[4];
 882         char ibuf[ZLOGIN_BUFSIZ];
 883         int cc, ret;
 884 
 885         /* read from stdout of zone and write to stdout of global zone */
 886         pollfds[0].fd = stdout_fd;
 887         pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
 888 
 889         /* read from stderr of zone and write to stderr of global zone */
 890         pollfds[1].fd = stderr_fd;
 891         pollfds[1].events = pollfds[0].events;
 892 
 893         /* read from stdin of global zone and write to stdin of zone */
 894         pollfds[2].fd = STDIN_FILENO;
 895         pollfds[2].events = pollfds[0].events;
 896 
 897         /* read from signalling pipe so we know when child dies */
 898         pollfds[3].fd = sig_fd;
 899         pollfds[3].events = pollfds[0].events;
 900 
 901         for (;;) {
 902                 pollfds[0].revents = pollfds[1].revents =
 903                     pollfds[2].revents = pollfds[3].revents = 0;
 904 
 905                 if (dead)
 906                         break;
 907 
 908                 /*
 909                  * There is a race condition here where we can receive the
 910                  * child death signal, set the dead flag, but since we have
 911                  * passed the test above, we would go into poll and hang.
 912                  * To avoid this we use the sig_fd as an additional poll fd.
 913                  * The signal handler writes into the other end of this pipe
 914                  * when the child dies so that the poll will always see that
 915                  * input and proceed.  We just loop around at that point and
 916                  * then notice the dead flag.
 917                  */
 918 
 919                 ret = poll(pollfds,
 920                     sizeof (pollfds) / sizeof (struct pollfd), -1);
 921 
 922                 if (ret == -1 && errno != EINTR) {
 923                         perror("poll failed");
 924                         break;
 925                 }
 926 
 927                 if (errno == EINTR && dead) {
 928                         break;
 929                 }
 930 
 931                 /* event from master side stderr */
 932                 if (pollfds[1].revents) {
 933                         if (pollfds[1].revents &
 934                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 935                                 if (process_output(stderr_fd, STDERR_FILENO)
 936                                     != 0)
 937                                         break;
 938                         } else {
 939                                 pollerr = pollfds[1].revents;
 940                                 break;
 941                         }
 942                 }
 943 
 944                 /* event from master side stdout */
 945                 if (pollfds[0].revents) {
 946                         if (pollfds[0].revents &
 947                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 948                                 if (process_output(stdout_fd, STDOUT_FILENO)
 949                                     != 0)
 950                                         break;
 951                         } else {
 952                                 pollerr = pollfds[0].revents;
 953                                 break;
 954                         }
 955                 }
 956 
 957                 /* event from user STDIN side */
 958                 if (pollfds[2].revents) {
 959                         if (pollfds[2].revents &
 960                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 961                                 /*
 962                                  * stdin fd is stdin of the target; so,
 963                                  * the thing we'll write the user data *to*.
 964                                  *
 965                                  * Also, unlike on the output side, we
 966                                  * close the pipe on a zero-length message.
 967                                  */
 968                                 int res;
 969 
 970                                 if (raw_mode)
 971                                         res = process_raw_input(stdin_fd,
 972                                             appin_fd);
 973                                 else
 974                                         res = process_user_input(stdin_fd,
 975                                             stdout_fd);
 976 
 977                                 if (res < 0)
 978                                         break;
 979                                 if (res > 0) {
 980                                         /* EOF (close) child's stdin_fd */
 981                                         pollfds[2].fd = -1;
 982                                         while ((res = close(stdin_fd)) != 0 &&
 983                                             errno == EINTR)
 984                                                 ;
 985                                         if (res != 0)
 986                                                 break;
 987                                 }
 988 
 989                         } else if (raw_mode && pollfds[2].revents & POLLHUP) {
 990                                 /*
 991                                  * It's OK to get a POLLHUP on STDIN-- it
 992                                  * always happens if you do:
 993                                  *
 994                                  * echo foo | zlogin <zone> <command>
 995                                  *
 996                                  * We reset fd to -1 in this case to clear
 997                                  * the condition and close the pipe (EOF) to
 998                                  * the other side in order to wrap things up.
 999                                  */
1000                                 int res;
1001 
1002                                 pollfds[2].fd = -1;
1003                                 while ((res = close(stdin_fd)) != 0 &&
1004                                     errno == EINTR)
1005                                         ;
1006                                 if (res != 0)
1007                                         break;
1008                         } else {
1009                                 pollerr = pollfds[2].revents;
1010                                 break;
1011                         }
1012                 }
1013         }
1014 
1015         /*
1016          * We are in the midst of dying, but try to poll with a short
1017          * timeout to see if we can catch the last bit of I/O from the
1018          * children.
1019          */
1020 retry:
1021         pollfds[0].revents = pollfds[1].revents = 0;
1022         (void) poll(pollfds, 2, 100);
1023         if (pollfds[0].revents &
1024             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
1025                 if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
1026                         (void) write(STDOUT_FILENO, ibuf, cc);
1027                         goto retry;
1028                 }
1029         }
1030         if (pollfds[1].revents &
1031             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
1032                 if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
1033                         (void) write(STDERR_FILENO, ibuf, cc);
1034                         goto retry;
1035                 }
1036         }
1037 }
1038 
1039 /*
1040  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
1041  */
1042 static const char *
1043 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
1044     size_t len)
1045 {
1046         bzero(user_cmd, sizeof (user_cmd));
1047         if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
1048                 return (NULL);
1049 
1050         return (user_cmd);
1051 }
1052 
1053 /* From libc */
1054 extern int str2passwd(const char *, int, void *, char *, int);
1055 
1056 /*
1057  * exec() the user_cmd brand hook, and convert the output string to a
1058  * struct passwd.  This is to be called after zone_enter().
1059  *
1060  */
1061 static struct passwd *
1062 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
1063     int pwbuflen)
1064 {
1065         char pwline[NSS_BUFLEN_PASSWD];
1066         char *cin = NULL;
1067         FILE *fin;
1068         int status;
1069 
1070         assert(getzoneid() != GLOBAL_ZONEID);
1071 
1072         if ((fin = popen(user_cmd, "r")) == NULL)
1073                 return (NULL);
1074 
1075         while (cin == NULL && !feof(fin))
1076                 cin = fgets(pwline, sizeof (pwline), fin);
1077 
1078         if (cin == NULL) {
1079                 (void) pclose(fin);
1080                 return (NULL);
1081         }
1082 
1083         status = pclose(fin);
1084         if (!WIFEXITED(status))
1085                 return (NULL);
1086         if (WEXITSTATUS(status) != 0)
1087                 return (NULL);
1088 
1089         if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1090                 return (pwent);
1091         else
1092                 return (NULL);
1093 }
1094 
1095 static char **
1096 zone_login_cmd(brand_handle_t bh, const char *login)
1097 {
1098         static char result_buf[ARG_MAX];
1099         char **new_argv, *ptr, *lasts;
1100         int n, a;
1101 
1102         /* Get the login command for the target zone. */
1103         bzero(result_buf, sizeof (result_buf));
1104 
1105         if (forced_login) {
1106                 if (brand_get_forcedlogin_cmd(bh, login,
1107                     result_buf, sizeof (result_buf)) != 0)
1108                         return (NULL);
1109         } else {
1110                 if (brand_get_login_cmd(bh, login,
1111                     result_buf, sizeof (result_buf)) != 0)
1112                         return (NULL);
1113         }
1114 
1115         /*
1116          * We got back a string that we'd like to execute.  But since
1117          * we're not doing the execution via a shell we'll need to convert
1118          * the exec string to an array of strings.  We'll do that here
1119          * but we're going to be very simplistic about it and break stuff
1120          * up based on spaces.  We're not even going to support any kind
1121          * of quoting or escape characters.  It's truly amazing that
1122          * there is no library function in Illumos to do this for us.
1123          */
1124 
1125         /*
1126          * Be paranoid.  Since we're deliniating based on spaces make
1127          * sure there are no adjacent spaces.
1128          */
1129         if (strstr(result_buf, "  ") != NULL)
1130                 return (NULL);
1131 
1132         /* Remove any trailing whitespace.  */
1133         n = strlen(result_buf);
1134         if (result_buf[n - 1] == ' ')
1135                 result_buf[n - 1] = '\0';
1136 
1137         /* Count how many elements there are in the exec string. */
1138         ptr = result_buf;
1139         for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1140                 ;
1141 
1142         /* Allocate the argv array that we're going to return. */
1143         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1144                 return (NULL);
1145 
1146         /* Tokenize the exec string and return. */
1147         a = 0;
1148         new_argv[a++] = result_buf;
1149         if (n > 2) {
1150                 (void) strtok_r(result_buf, " ", &lasts);
1151                 while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1152                         ;
1153         } else {
1154                 new_argv[a++] = NULL;
1155         }
1156         assert(n == a);
1157         return (new_argv);
1158 }
1159 
1160 /*
1161  * Prepare argv array for exec'd process.  If commands are passed to the new
1162  * process and su(1M) is avalable, use it for the invocation.  Otherwise, use
1163  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1164  * login that we're coming from another zone, and to disregard its CONSOLE
1165  * checks).
1166  */
1167 static char **
1168 prep_args(brand_handle_t bh, char *zonename, const char *login, char **argv)
1169 {
1170         int argc = 0, i;
1171         size_t subshell_len = 1;
1172         char *subshell = NULL, *supath = NULL;
1173         char **new_argv = NULL;
1174 
1175         if (argv == NULL) {
1176                 if (failsafe) {
1177                         if ((new_argv = malloc(sizeof (char *) * 2)) == NULL)
1178                                 return (NULL);
1179                         new_argv[0] = FAILSAFESHELL;
1180                         new_argv[1] = NULL;
1181                 } else {
1182                         new_argv = zone_login_cmd(bh, login);
1183                 }
1184                 return (new_argv);
1185         }
1186 
1187         /*
1188          * Attempt to locate a 'su' binary if not using the failsafe shell.
1189          */
1190         if (!failsafe) {
1191                 struct stat sb;
1192                 char zonepath[MAXPATHLEN];
1193                 char supath_check[MAXPATHLEN];
1194 
1195                 if (zone_get_zonepath(zonename, zonepath,
1196                     sizeof (zonepath)) != Z_OK) {
1197                         zerror(gettext("unable to determine zone "
1198                             "path"));
1199                         return (NULL);
1200                 }
1201 
1202                 (void) snprintf(supath_check, sizeof (supath), "%s/root/%s",
1203                     zonepath, SUPATH1);
1204                 if (stat(supath_check, &sb) == 0) {
1205                         supath = SUPATH1;
1206                 } else {
1207                         (void) snprintf(supath_check, sizeof (supath_check),
1208                             "%s/root/%s", zonepath, SUPATH2);
1209                         if (stat(supath_check, &sb) == 0) {
1210                                 supath = SUPATH2;
1211                         }
1212                 }
1213         }
1214 
1215         /*
1216          * With no failsafe shell or supath to wrap the incoming command, the
1217          * arguments are passed straight through.
1218          */
1219         if (!failsafe && supath == NULL) {
1220                 /*
1221                  * Such an outcome is not acceptable, however, if the caller
1222                  * expressed a desire to switch users.
1223                  */
1224                 if (strcmp(login, "root") != 0) {
1225                         zerror(gettext("unable to find 'su' command"));
1226                         return (NULL);
1227                 }
1228                 return (argv);
1229         }
1230 
1231         /*
1232          * Inventory arguments and allocate a buffer to escape them for the
1233          * subshell.
1234          */
1235         while (argv[argc] != NULL) {
1236                 /*
1237                  * Allocate enough space for the delimiter and 2
1238                  * quotes which might be needed.
1239                  */
1240                 subshell_len += strlen(argv[argc]) + 3;
1241                 argc++;
1242         }
1243         if ((subshell = calloc(1, subshell_len)) == NULL) {
1244                 return (NULL);
1245         }
1246 
1247         /*
1248          * The handling of quotes in the following block may seem unusual, but
1249          * it is done this way for backward compatibility.
1250          * When running a command, zlogin is documented as:
1251          *    zlogin zonename command args
1252          * However, some code has come to depend on the following usage:
1253          *    zlogin zonename 'command args'
1254          * This relied on the fact that the single argument would be re-parsed
1255          * within the zone and excuted as a command with an argument. To remain
1256          * compatible with this (incorrect) usage, if there is only a single
1257          * argument, it is not quoted, even if it has embedded spaces.
1258          *
1259          * Here are two examples which both need to work:
1260          * 1) zlogin foo 'echo hello'
1261          *    This has a single argv member with a space in it but will not be
1262          *    quoted on the command passed into the zone.
1263          * 2) zlogin foo bash -c 'echo hello'
1264          *    This has 3 argv members. The 3rd arg has a space and must be
1265          *    quoted on the command passed into the zone.
1266          */
1267         for (i = 0; i < argc; i++) {
1268                 if (i > 0)
1269                         (void) strcat(subshell, " ");
1270 
1271                 if (argc > 1 && (strchr(argv[i], ' ') != NULL ||
1272                     strchr(argv[i], '\t') != NULL)) {
1273                         (void) strcat(subshell, "'");
1274                         (void) strcat(subshell, argv[i]);
1275                         (void) strcat(subshell, "'");
1276                 } else {
1277                         (void) strcat(subshell, argv[i]);
1278                 }
1279         }
1280 
1281         if (failsafe) {
1282                 int a = 0, n = 4;
1283 
1284                 if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1285                         return (NULL);
1286 
1287                 new_argv[a++] = FAILSAFESHELL;
1288                 new_argv[a++] = "-c";
1289                 new_argv[a++] = subshell;
1290                 new_argv[a++] = NULL;
1291                 assert(a == n);
1292         } else {
1293                 int a = 0, n = 6;
1294 
1295                 assert(supath != NULL);
1296                 if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1297                         return (NULL);
1298 
1299                 new_argv[a++] = supath;
1300                 if (strcmp(login, "root") != 0) {
1301                         new_argv[a++] = "-";
1302                 } else {
1303                         n--;
1304                 }
1305                 new_argv[a++] = (char *)login;
1306                 new_argv[a++] = "-c";
1307                 new_argv[a++] = subshell;
1308                 new_argv[a++] = NULL;
1309                 assert(a == n);
1310         }
1311 
1312         return (new_argv);
1313 }
1314 
1315 /*
1316  * Helper routine for prep_env below.
1317  */
1318 static char *
1319 add_env(char *name, char *value)
1320 {
1321         size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1322         char *str;
1323 
1324         if ((str = malloc(sz)) == NULL)
1325                 return (NULL);
1326 
1327         (void) snprintf(str, sz, "%s=%s", name, value);
1328         return (str);
1329 }
1330 
1331 /*
1332  * Prepare envp array for exec'd process.
1333  */
1334 static char **
1335 prep_env()
1336 {
1337         int e = 0, size = 1;
1338         char **new_env, *estr;
1339         char *term = getenv("TERM");
1340         char *path;
1341 
1342         size++; /* for $PATH */
1343         if (term != NULL)
1344                 size++;
1345 
1346         /*
1347          * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1348          * We also set $SHELL, since neither login nor su will be around to do
1349          * it.
1350          */
1351         if (failsafe)
1352                 size += 2;
1353 
1354         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1355                 return (NULL);
1356 
1357         if (strcmp(zonebrand, "lx") == 0)
1358                 path = LX_DEF_PATH;
1359         else
1360                 path = DEF_PATH;
1361 
1362         if ((estr = add_env("PATH", path)) == NULL)
1363                 return (NULL);
1364         new_env[e++] = estr;
1365 
1366         if (term != NULL) {
1367                 if ((estr = add_env("TERM", term)) == NULL)
1368                         return (NULL);
1369                 new_env[e++] = estr;
1370         }
1371 
1372         if (failsafe) {
1373                 if ((estr = add_env("HOME", "/")) == NULL)
1374                         return (NULL);
1375                 new_env[e++] = estr;
1376 
1377                 if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1378                         return (NULL);
1379                 new_env[e++] = estr;
1380         }
1381 
1382         new_env[e++] = NULL;
1383 
1384         assert(e == size);
1385 
1386         return (new_env);
1387 }
1388 
1389 /*
1390  * Finish the preparation of the envp array for exec'd non-interactive
1391  * zlogins.  This is called in the child process *after* we zone_enter(), since
1392  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1393  * etc.  We need only do this in the non-interactive, mode, since otherwise
1394  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1395  * additional ways in which the command could fail, and we'd prefer to avoid
1396  * that.
1397  */
1398 static char **
1399 prep_env_noninteractive(const char *user_cmd, char **env)
1400 {
1401         size_t size;
1402         char **new_env;
1403         int e, i;
1404         char *estr;
1405         char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1406         char pwbuf[NSS_BUFLEN_PASSWD + 1];
1407         struct passwd pwent;
1408         struct passwd *pw = NULL;
1409 
1410         assert(env != NULL);
1411         assert(failsafe == 0);
1412 
1413         /*
1414          * Exec the "user_cmd" brand hook to get a pwent for the
1415          * login user.  If this fails, HOME will be set to "/", SHELL
1416          * will be set to $DEFAULTSHELL, and we will continue to exec
1417          * SUPATH <login> -c <cmd>.
1418          */
1419         pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1420 
1421         /*
1422          * Get existing envp size.
1423          */
1424         for (size = 0; env[size] != NULL; size++)
1425                 ;
1426 
1427         e = size;
1428 
1429         /*
1430          * Finish filling out the environment; we duplicate the environment
1431          * setup described in login(1), for lack of a better precedent.
1432          */
1433         if (pw != NULL)
1434                 size += 3;      /* LOGNAME, HOME, MAIL */
1435         else
1436                 size += 1;      /* HOME */
1437 
1438         size++; /* always fill in SHELL */
1439         size++; /* terminating NULL */
1440 
1441         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1442                 goto malloc_fail;
1443 
1444         /*
1445          * Copy existing elements of env into new_env.
1446          */
1447         for (i = 0; env[i] != NULL; i++) {
1448                 if ((new_env[i] = strdup(env[i])) == NULL)
1449                         goto malloc_fail;
1450         }
1451         assert(e == i);
1452 
1453         if (pw != NULL) {
1454                 if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1455                         goto malloc_fail;
1456                 new_env[e++] = estr;
1457 
1458                 if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1459                         goto malloc_fail;
1460                 new_env[e++] = estr;
1461 
1462                 if (chdir(pw->pw_dir) != 0)
1463                         zerror(gettext("Could not chdir to home directory "
1464                             "%s: %s"), pw->pw_dir, strerror(errno));
1465 
1466                 (void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1467                     pw->pw_name);
1468                 if ((estr = add_env("MAIL", varmail)) == NULL)
1469                         goto malloc_fail;
1470                 new_env[e++] = estr;
1471         } else {
1472                 if ((estr = add_env("HOME", "/")) == NULL)
1473                         goto malloc_fail;
1474                 new_env[e++] = estr;
1475         }
1476 
1477         if (pw != NULL && strlen(pw->pw_shell) > 0) {
1478                 if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1479                         goto malloc_fail;
1480                 new_env[e++] = estr;
1481         } else {
1482                 if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1483                         goto malloc_fail;
1484                 new_env[e++] = estr;
1485         }
1486 
1487         new_env[e++] = NULL;    /* add terminating NULL */
1488 
1489         assert(e == size);
1490         return (new_env);
1491 
1492 malloc_fail:
1493         zperror(gettext("failed to allocate memory for process environment"));
1494         return (NULL);
1495 }
1496 
1497 static int
1498 close_func(void *slavefd, int fd)
1499 {
1500         if (fd != *(int *)slavefd)
1501                 (void) close(fd);
1502         return (0);
1503 }
1504 
1505 static void
1506 set_cmdchar(char *cmdcharstr)
1507 {
1508         char c;
1509         long lc;
1510 
1511         if ((c = *cmdcharstr) != '\\') {
1512                 cmdchar = c;
1513                 return;
1514         }
1515 
1516         c = cmdcharstr[1];
1517         if (c == '\0' || c == '\\') {
1518                 cmdchar = '\\';
1519                 return;
1520         }
1521 
1522         if (c < '0' || c > '7') {
1523                 zerror(gettext("Unrecognized escape character option %s"),
1524                     cmdcharstr);
1525                 usage();
1526         }
1527 
1528         lc = strtol(cmdcharstr + 1, NULL, 8);
1529         if (lc < 0 || lc > 255) {
1530                 zerror(gettext("Octal escape character '%s' too large"),
1531                     cmdcharstr);
1532                 usage();
1533         }
1534         cmdchar = (char)lc;
1535 }
1536 
1537 static int
1538 setup_utmpx(char *slavename)
1539 {
1540         struct utmpx ut;
1541 
1542         bzero(&ut, sizeof (ut));
1543         (void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1544         (void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1545         ut.ut_pid = getpid();
1546         ut.ut_id[0] = 'z';
1547         ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1548         ut.ut_type = LOGIN_PROCESS;
1549         (void) time(&ut.ut_tv.tv_sec);
1550 
1551         if (makeutx(&ut) == NULL) {
1552                 zerror(gettext("makeutx failed"));
1553                 return (-1);
1554         }
1555         return (0);
1556 }
1557 
1558 static void
1559 release_lock_file(int lockfd)
1560 {
1561         (void) close(lockfd);
1562 }
1563 
1564 static int
1565 grab_lock_file(const char *zone_name, int *lockfd)
1566 {
1567         char pathbuf[PATH_MAX];
1568         struct flock flock;
1569 
1570         if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1571                 zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1572                     strerror(errno));
1573                 return (-1);
1574         }
1575         (void) chmod(ZONES_TMPDIR, S_IRWXU);
1576         (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1577             ZONES_TMPDIR, zone_name);
1578 
1579         if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1580                 zerror(gettext("could not open %s: %s"), pathbuf,
1581                     strerror(errno));
1582                 return (-1);
1583         }
1584         /*
1585          * Lock the file to synchronize with other zoneadmds
1586          */
1587         flock.l_type = F_WRLCK;
1588         flock.l_whence = SEEK_SET;
1589         flock.l_start = (off_t)0;
1590         flock.l_len = (off_t)0;
1591         if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1592                 zerror(gettext("unable to lock %s: %s"), pathbuf,
1593                     strerror(errno));
1594                 release_lock_file(*lockfd);
1595                 return (-1);
1596         }
1597         return (Z_OK);
1598 }
1599 
1600 static int
1601 start_zoneadmd(const char *zone_name)
1602 {
1603         pid_t retval;
1604         int pstatus = 0, error = -1, lockfd, doorfd;
1605         struct door_info info;
1606         char doorpath[MAXPATHLEN];
1607 
1608         (void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1609 
1610         if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1611                 return (-1);
1612         /*
1613          * We must do the door check with the lock held.  Otherwise, we
1614          * might race against another zoneadm/zlogin process and wind
1615          * up with two processes trying to start zoneadmd at the same
1616          * time.  zoneadmd will detect this, and fail, but we prefer this
1617          * to be as seamless as is practical, from a user perspective.
1618          */
1619         if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1620                 if (errno != ENOENT) {
1621                         zerror("failed to open %s: %s", doorpath,
1622                             strerror(errno));
1623                         goto out;
1624                 }
1625         } else {
1626                 /*
1627                  * Seems to be working ok.
1628                  */
1629                 if (door_info(doorfd, &info) == 0 &&
1630                     ((info.di_attributes & DOOR_REVOKED) == 0)) {
1631                         error = 0;
1632                         goto out;
1633                 }
1634         }
1635 
1636         if ((child_pid = fork()) == -1) {
1637                 zperror(gettext("could not fork"));
1638                 goto out;
1639         } else if (child_pid == 0) {
1640                 /* child process */
1641                 (void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1642                     zone_name, NULL);
1643                 zperror(gettext("could not exec zoneadmd"));
1644                 _exit(1);
1645         }
1646 
1647         /* parent process */
1648         do {
1649                 retval = waitpid(child_pid, &pstatus, 0);
1650         } while (retval != child_pid);
1651         if (WIFSIGNALED(pstatus) ||
1652             (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1653                 zerror(gettext("could not start %s"), "zoneadmd");
1654                 goto out;
1655         }
1656         error = 0;
1657 out:
1658         release_lock_file(lockfd);
1659         (void) close(doorfd);
1660         return (error);
1661 }
1662 
1663 static int
1664 init_template(void)
1665 {
1666         int fd;
1667         int err = 0;
1668 
1669         fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1670         if (fd == -1)
1671                 return (-1);
1672 
1673         /*
1674          * zlogin doesn't do anything with the contract.
1675          * Deliver no events, don't inherit, and allow it to be orphaned.
1676          */
1677         err |= ct_tmpl_set_critical(fd, 0);
1678         err |= ct_tmpl_set_informative(fd, 0);
1679         err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1680         err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1681         if (err || ct_tmpl_activate(fd)) {
1682                 (void) close(fd);
1683                 return (-1);
1684         }
1685 
1686         return (fd);
1687 }
1688 
1689 static int
1690 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1691     char **new_args, char **new_env)
1692 {
1693         pid_t retval;
1694         int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1695         int child_status;
1696         int tmpl_fd;
1697         sigset_t block_cld;
1698 
1699         if ((tmpl_fd = init_template()) == -1) {
1700                 reset_tty();
1701                 zperror(gettext("could not create contract"));
1702                 return (1);
1703         }
1704 
1705         if (pipe(stdin_pipe) != 0) {
1706                 zperror(gettext("could not create STDIN pipe"));
1707                 return (1);
1708         }
1709         /*
1710          * When the user types ^D, we get a zero length message on STDIN.
1711          * We need to echo that down the pipe to send it to the other side;
1712          * but by default, pipes don't propagate zero-length messages.  We
1713          * toggle that behavior off using I_SWROPT.  See streamio(7i).
1714          */
1715         if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1716                 zperror(gettext("could not configure STDIN pipe"));
1717                 return (1);
1718 
1719         }
1720         if (pipe(stdout_pipe) != 0) {
1721                 zperror(gettext("could not create STDOUT pipe"));
1722                 return (1);
1723         }
1724         if (pipe(stderr_pipe) != 0) {
1725                 zperror(gettext("could not create STDERR pipe"));
1726                 return (1);
1727         }
1728 
1729         if (pipe(dead_child_pipe) != 0) {
1730                 zperror(gettext("could not create signalling pipe"));
1731                 return (1);
1732         }
1733         close_on_sig = dead_child_pipe[0];
1734 
1735         /*
1736          * If any of the pipe FD's winds up being less than STDERR, then we
1737          * have a mess on our hands-- and we are lacking some of the I/O
1738          * streams we would expect anyway.  So we bail.
1739          */
1740         if (stdin_pipe[0] <= STDERR_FILENO ||
1741             stdin_pipe[1] <= STDERR_FILENO ||
1742             stdout_pipe[0] <= STDERR_FILENO ||
1743             stdout_pipe[1] <= STDERR_FILENO ||
1744             stderr_pipe[0] <= STDERR_FILENO ||
1745             stderr_pipe[1] <= STDERR_FILENO ||
1746             dead_child_pipe[0] <= STDERR_FILENO ||
1747             dead_child_pipe[1] <= STDERR_FILENO) {
1748                 zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1749                 return (1);
1750         }
1751 
1752         if (prefork_dropprivs() != 0) {
1753                 zperror(gettext("could not allocate privilege set"));
1754                 return (1);
1755         }
1756 
1757         (void) sigset(SIGCLD, sigcld);
1758         (void) sigemptyset(&block_cld);
1759         (void) sigaddset(&block_cld, SIGCLD);
1760         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1761 
1762         if ((child_pid = fork()) == -1) {
1763                 (void) ct_tmpl_clear(tmpl_fd);
1764                 (void) close(tmpl_fd);
1765                 zperror(gettext("could not fork"));
1766                 return (1);
1767         } else if (child_pid == 0) { /* child process */
1768                 (void) ct_tmpl_clear(tmpl_fd);
1769 
1770                 /*
1771                  * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1772                  */
1773                 (void) close(STDIN_FILENO);
1774                 (void) close(STDOUT_FILENO);
1775                 (void) close(STDERR_FILENO);
1776                 (void) dup2(stdin_pipe[1], STDIN_FILENO);
1777                 (void) dup2(stdout_pipe[1], STDOUT_FILENO);
1778                 (void) dup2(stderr_pipe[1], STDERR_FILENO);
1779                 (void) closefrom(STDERR_FILENO + 1);
1780 
1781                 (void) sigset(SIGCLD, SIG_DFL);
1782                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1783                 /*
1784                  * In case any of stdin, stdout or stderr are streams,
1785                  * anchor them to prevent malicious I_POPs.
1786                  */
1787                 (void) ioctl(STDIN_FILENO, I_ANCHOR);
1788                 (void) ioctl(STDOUT_FILENO, I_ANCHOR);
1789                 (void) ioctl(STDERR_FILENO, I_ANCHOR);
1790 
1791                 if (zone_enter(zoneid) == -1) {
1792                         zerror(gettext("could not enter zone %s: %s"),
1793                             zonename, strerror(errno));
1794                         _exit(1);
1795                 }
1796 
1797                 /*
1798                  * For non-native zones, tell libc where it can find locale
1799                  * specific getttext() messages.
1800                  */
1801                 if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1802                         (void) bindtextdomain(TEXT_DOMAIN,
1803                             "/.SUNWnative/usr/lib/locale");
1804                 else if (access("/native/usr/lib/locale", R_OK) == 0)
1805                         (void) bindtextdomain(TEXT_DOMAIN,
1806                             "/native/usr/lib/locale");
1807 
1808                 if (!failsafe)
1809                         new_env = prep_env_noninteractive(user_cmd, new_env);
1810 
1811                 if (new_env == NULL) {
1812                         _exit(1);
1813                 }
1814 
1815                 /*
1816                  * Move into a new process group; the zone_enter will have
1817                  * placed us into zsched's session, and we want to be in
1818                  * a unique process group.
1819                  */
1820                 (void) setpgid(getpid(), getpid());
1821 
1822                 /*
1823                  * The child needs to run as root to
1824                  * execute the su program.
1825                  */
1826                 if (setuid(0) == -1) {
1827                         zperror(gettext("insufficient privilege"));
1828                         return (1);
1829                 }
1830 
1831                 (void) execve(new_args[0], new_args, new_env);
1832                 zperror(gettext("exec failure"));
1833                 _exit(1);
1834         }
1835         /* parent */
1836 
1837         /* close pipe sides written by child */
1838         (void) close(stdout_pipe[1]);
1839         (void) close(stderr_pipe[1]);
1840 
1841         (void) sigset(SIGINT, sig_forward);
1842 
1843         postfork_dropprivs();
1844 
1845         (void) ct_tmpl_clear(tmpl_fd);
1846         (void) close(tmpl_fd);
1847 
1848         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1849         doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1850             dead_child_pipe[1], B_TRUE);
1851         do {
1852                 retval = waitpid(child_pid, &child_status, 0);
1853                 if (retval == -1) {
1854                         child_status = 0;
1855                 }
1856         } while (retval != child_pid && errno != ECHILD);
1857 
1858         return (WEXITSTATUS(child_status));
1859 }
1860 
1861 static char *
1862 get_username()
1863 {
1864         uid_t   uid;
1865         struct passwd *nptr;
1866 
1867         /*
1868          * Authorizations are checked to restrict access based on the
1869          * requested operation and zone name, It is assumed that the
1870          * program is running with all privileges, but that the real
1871          * user ID is that of the user or role on whose behalf we are
1872          * operating. So we start by getting the username that will be
1873          * used for subsequent authorization checks.
1874          */
1875 
1876         uid = getuid();
1877         if ((nptr = getpwuid(uid)) == NULL) {
1878                 zerror(gettext("could not get user name."));
1879                 _exit(1);
1880         }
1881         return (nptr->pw_name);
1882 }
1883 
1884 static boolean_t
1885 zlog_mode_logging(char *zonename, boolean_t *found)
1886 {
1887         boolean_t lm = B_FALSE;
1888         zone_dochandle_t handle;
1889         struct zone_attrtab attr;
1890 
1891         *found = B_FALSE;
1892         if ((handle = zonecfg_init_handle()) == NULL)
1893                 return (lm);
1894 
1895         if (zonecfg_get_handle(zonename, handle) != Z_OK)
1896                 goto done;
1897 
1898         if (zonecfg_setattrent(handle) != Z_OK)
1899                 goto done;
1900         while (zonecfg_getattrent(handle, &attr) == Z_OK) {
1901                 if (strcmp("zlog-mode", attr.zone_attr_name) == 0) {
1902                         int len = strlen(attr.zone_attr_value);
1903 
1904                         *found = B_TRUE;
1905                         if (strncmp("log", attr.zone_attr_value, 3) == 0 ||
1906                             strncmp("nolog", attr.zone_attr_value, 5) == 0 ||
1907                             (len >= 3 && attr.zone_attr_value[len - 2] == '-'))
1908                                 lm = B_TRUE;
1909                         break;
1910                 }
1911         }
1912         (void) zonecfg_endattrent(handle);
1913 
1914 done:
1915         zonecfg_fini_handle(handle);
1916         return (lm);
1917 }
1918 
1919 int
1920 main(int argc, char **argv)
1921 {
1922         int arg, console = 0, imode = 0;
1923         int estatus = 0;
1924         zoneid_t zoneid;
1925         zone_state_t st;
1926         char *login = "root";
1927         int iflag = 0;
1928         int lflag = 0;
1929         int nflag = 0;
1930         char *zonename = NULL;
1931         char **proc_args = NULL;
1932         char **new_args, **new_env;
1933         sigset_t block_cld;
1934         siginfo_t si;
1935         char devroot[MAXPATHLEN];
1936         char *slavename, slaveshortname[MAXPATHLEN];
1937         priv_set_t *privset;
1938         int tmpl_fd;
1939         char default_brand[MAXNAMELEN];
1940         struct stat sb;
1941         char kernzone[ZONENAME_MAX];
1942         brand_handle_t bh;
1943         char user_cmd[MAXPATHLEN];
1944         char authname[MAXAUTHS];
1945 
1946         (void) setlocale(LC_ALL, "");
1947         (void) textdomain(TEXT_DOMAIN);
1948 
1949         (void) getpname(argv[0]);
1950         username = get_username();
1951 
1952         while ((arg = getopt(argc, argv, "diNnECIR:Se:l:Q")) != EOF) {
1953                 switch (arg) {
1954                 case 'C':
1955                         console = 1;
1956                         break;
1957                 case 'E':
1958                         nocmdchar = 1;
1959                         break;
1960                 case 'I':
1961                         /*
1962                          * interactive mode is just a slight variation on the
1963                          * console mode.
1964                          */
1965                         console = 1;
1966                         imode = 1;
1967                         /* The default is HUP, disconnect on EOF */
1968                         connect_flags ^= ZLOGIN_ZFD_EOF;
1969                         break;
1970                 case 'R':       /* undocumented */
1971                         if (*optarg != '/') {
1972                                 zerror(gettext("root path must be absolute."));
1973                                 exit(2);
1974                         }
1975                         if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1976                                 zerror(
1977                                     gettext("root path must be a directory."));
1978                                 exit(2);
1979                         }
1980                         zonecfg_set_root(optarg);
1981                         break;
1982                 case 'Q':
1983                         quiet = 1;
1984                         break;
1985                 case 'S':
1986                         failsafe = 1;
1987                         break;
1988                 case 'd':
1989                         connect_flags |= ZLOGIN_DISCONNECT;
1990                         break;
1991                 case 'e':
1992                         set_cmdchar(optarg);
1993                         break;
1994                 case 'i':
1995                         iflag = 1;
1996                         break;
1997                 case 'l':
1998                         login = optarg;
1999                         lflag = 1;
2000                         break;
2001                 case 'N':
2002                         /* NOHUP - do not send EOF */
2003                         connect_flags ^= ZLOGIN_ZFD_EOF;
2004                         break;
2005                 case 'n':
2006                         nflag = 1;
2007                         break;
2008                 default:
2009                         usage();
2010                 }
2011         }
2012 
2013         if (console != 0) {
2014 
2015                 /*
2016                  * The only connect option in console mode is ZLOGIN_DISCONNECT
2017                  */
2018                 if (imode == 0)
2019                         connect_flags &= ZLOGIN_DISCONNECT;
2020 
2021                 if (lflag != 0) {
2022                         zerror(gettext(
2023                             "-l may not be specified for console login"));
2024                         usage();
2025                 }
2026 
2027                 if (nflag != 0) {
2028                         zerror(gettext(
2029                             "-n may not be specified for console login"));
2030                         usage();
2031                 }
2032 
2033                 if (failsafe != 0) {
2034                         zerror(gettext(
2035                             "-S may not be specified for console login"));
2036                         usage();
2037                 }
2038 
2039                 if (zonecfg_in_alt_root()) {
2040                         zerror(gettext(
2041                             "-R may not be specified for console login"));
2042                         exit(2);
2043                 }
2044 
2045         }
2046 
2047         if (iflag != 0 && nflag != 0) {
2048                 zerror(gettext("-i and -n flags are incompatible"));
2049                 usage();
2050         }
2051 
2052         if (failsafe != 0 && lflag != 0) {
2053                 zerror(gettext("-l may not be specified for failsafe login"));
2054                 usage();
2055         }
2056 
2057         if (!console && (connect_flags & ZLOGIN_DISCONNECT) != 0) {
2058                 zerror(gettext(
2059                     "-d may only be specified with console login"));
2060                 usage();
2061         }
2062 
2063         if (imode == 0 && (connect_flags & ZLOGIN_ZFD_EOF) != 0) {
2064                 zerror(gettext("-N may only be specified with -I"));
2065                 usage();
2066         }
2067 
2068         if (optind == (argc - 1)) {
2069                 /*
2070                  * zone name, no process name; this should be an interactive
2071                  * as long as STDIN is really a tty.
2072                  */
2073                 if (nflag != 0) {
2074                         zerror(gettext(
2075                             "-n may not be specified for interactive login"));
2076                         usage();
2077                 }
2078                 if (isatty(STDIN_FILENO))
2079                         interactive = 1;
2080                 zonename = argv[optind];
2081         } else if (optind < (argc - 1)) {
2082                 if (console) {
2083                         zerror(gettext("Commands may not be specified for "
2084                             "console login."));
2085                         usage();
2086                 }
2087                 /* zone name and process name, and possibly some args */
2088                 zonename = argv[optind];
2089                 proc_args = &argv[optind + 1];
2090                 if (iflag && isatty(STDIN_FILENO))
2091                         interactive = 1;
2092         } else {
2093                 usage();
2094         }
2095 
2096         if (getzoneid() != GLOBAL_ZONEID) {
2097                 zerror(gettext("'%s' may only be used from the global zone"),
2098                     pname);
2099                 return (1);
2100         }
2101 
2102         if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
2103                 zerror(gettext("'%s' not applicable to the global zone"),
2104                     pname);
2105                 return (1);
2106         }
2107 
2108         if (zone_get_state(zonename, &st) != Z_OK) {
2109                 zerror(gettext("zone '%s' unknown"), zonename);
2110                 return (1);
2111         }
2112 
2113         if (st < ZONE_STATE_INSTALLED) {
2114                 zerror(gettext("cannot login to a zone which is '%s'"),
2115                     zone_state_str(st));
2116                 return (1);
2117         }
2118 
2119         /*
2120          * In both console and non-console cases, we require all privs.
2121          * In the console case, because we may need to startup zoneadmd.
2122          * In the non-console case in order to do zone_enter(2), zonept()
2123          * and other tasks.
2124          */
2125 
2126         if ((privset = priv_allocset()) == NULL) {
2127                 zperror(gettext("priv_allocset failed"));
2128                 return (1);
2129         }
2130 
2131         if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2132                 zperror(gettext("getppriv failed"));
2133                 priv_freeset(privset);
2134                 return (1);
2135         }
2136 
2137         if (priv_isfullset(privset) == B_FALSE) {
2138                 zerror(gettext("You lack sufficient privilege to run "
2139                     "this command (all privs required)"));
2140                 priv_freeset(privset);
2141                 return (1);
2142         }
2143         priv_freeset(privset);
2144 
2145         /*
2146          * Check if user is authorized for requested usage of the zone
2147          */
2148 
2149         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
2150             ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
2151         if (chkauthattr(authname, username) == 0) {
2152                 if (console) {
2153                         zerror(gettext("%s is not authorized for console "
2154                             "access to  %s zone."),
2155                             username, zonename);
2156                         return (1);
2157                 } else {
2158                         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
2159                             ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
2160                         if (failsafe || !interactive) {
2161                                 zerror(gettext("%s is not authorized for  "
2162                                     "failsafe or non-interactive login "
2163                                     "to  %s zone."), username, zonename);
2164                                 return (1);
2165                         } else if (chkauthattr(authname, username) == 0) {
2166                                 zerror(gettext("%s is not authorized "
2167                                     " to login to %s zone."),
2168                                     username, zonename);
2169                                 return (1);
2170                         }
2171                 }
2172         } else {
2173                 forced_login = B_TRUE;
2174         }
2175 
2176         /*
2177          * The console (or standalong interactive mode) is a separate case from
2178          * the rest of the code; handle it first.
2179          */
2180         if (console) {
2181                 int gz_stderr_fd = -1;
2182                 int retry;
2183                 boolean_t set_raw = B_TRUE;
2184 
2185                 if (imode) {
2186                         boolean_t has_zfd_config;
2187 
2188                         if (zlog_mode_logging(zonename, &has_zfd_config))
2189                                 set_raw = B_FALSE;
2190 
2191                         /*
2192                          * Asked for standalone interactive mode but the
2193                          * zlog-mode attribute is not configured on the zone.
2194                          */
2195                         if (!has_zfd_config) {
2196                                 zerror(gettext("'%s' is not configured on "
2197                                     "the zone"), "zlog-mode");
2198                                 return (1);
2199                         }
2200                 }
2201 
2202                 /*
2203                  * Ensure that zoneadmd for this zone is running.
2204                  */
2205                 if (start_zoneadmd(zonename) == -1)
2206                         return (1);
2207 
2208                 /*
2209                  * Make contact with zoneadmd.
2210                  *
2211                  * Handshake with the control socket first. We handle retries
2212                  * here since the relevant thread in zoneadmd might not have
2213                  * finished setting up yet.
2214                  */
2215                 for (retry = 0; retry < MAX_RETRY; retry++) {
2216                         masterfd = connect_zone_sock(zonename,
2217                             (imode ? "server_ctl" : "console_sock"), B_FALSE);
2218                         if (masterfd != -1)
2219                                 break;
2220                         (void) sleep(1);
2221                 }
2222 
2223                 if (retry == MAX_RETRY) {
2224                         zerror(gettext("unable to connect for %d seconds"),
2225                             MAX_RETRY);
2226                         return (1);
2227                 }
2228 
2229                 if (handshake_zone_sock(masterfd, connect_flags) != 0) {
2230                         (void) close(masterfd);
2231                         return (1);
2232                 }
2233 
2234                 if (imode) {
2235                         ctlfd = masterfd;
2236 
2237                         /* Now open the io-related sockets */
2238                         masterfd = connect_zone_sock(zonename, "server_out",
2239                             B_TRUE);
2240                         gz_stderr_fd = connect_zone_sock(zonename,
2241                             "server_err", B_TRUE);
2242                         if (masterfd == -1 || gz_stderr_fd == -1) {
2243                                 (void) close(ctlfd);
2244                                 (void) close(masterfd);
2245                                 (void) close(gz_stderr_fd);
2246                                 return (1);
2247                         }
2248                 }
2249 
2250                 if (!quiet) {
2251                         if (imode)
2252                                 (void) printf(gettext("[Connected to zone '%s' "
2253                                     "interactively]\n"), zonename);
2254                         else
2255                                 (void) printf(gettext("[Connected to zone '%s' "
2256                                     "console]\n"), zonename);
2257                 }
2258 
2259                 if (set_raw && set_tty_rawmode(STDIN_FILENO) == -1) {
2260                         reset_tty();
2261                         zperror(gettext("failed to set stdin pty to raw mode"));
2262                         return (1);
2263                 }
2264 
2265                 (void) sigset(SIGWINCH, sigwinch);
2266                 (void) sigwinch(0);
2267 
2268                 if (imode) {
2269                         /* Allow EOF mode toggling via SIGUSR1 */
2270                         (void) sigset(SIGUSR1, sigusr1);
2271                 }
2272 
2273                 /*
2274                  * Run the I/O loop until we get disconnected.
2275                  */
2276                 doio(masterfd, -1, masterfd, gz_stderr_fd, -1, B_FALSE);
2277                 reset_tty();
2278                 if (!quiet) {
2279                         if (imode)
2280                                 (void) printf(gettext("\n[Interactive "
2281                                     "connection to zone '%s' closed]\n"),
2282                                     zonename);
2283                         else
2284                                 (void) printf(gettext("\n[Connection to zone "
2285                                     "'%s' console closed]\n"), zonename);
2286                 }
2287 
2288                 return (0);
2289         }
2290 
2291         if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
2292                 zerror(gettext("login allowed only to running zones "
2293                     "(%s is '%s')."), zonename, zone_state_str(st));
2294                 return (1);
2295         }
2296 
2297         (void) strlcpy(kernzone, zonename, sizeof (kernzone));
2298         if (zonecfg_in_alt_root()) {
2299                 FILE *fp = zonecfg_open_scratch("", B_FALSE);
2300 
2301                 if (fp == NULL || zonecfg_find_scratch(fp, zonename,
2302                     zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
2303                         zerror(gettext("cannot find scratch zone %s"),
2304                             zonename);
2305                         if (fp != NULL)
2306                                 zonecfg_close_scratch(fp);
2307                         return (1);
2308                 }
2309                 zonecfg_close_scratch(fp);
2310         }
2311 
2312         if ((zoneid = getzoneidbyname(kernzone)) == -1) {
2313                 zerror(gettext("failed to get zoneid for zone '%s'"),
2314                     zonename);
2315                 return (1);
2316         }
2317 
2318         /*
2319          * We need the zone root path only if we are setting up a pty.
2320          */
2321         if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
2322                 zerror(gettext("could not get dev path for zone %s"),
2323                     zonename);
2324                 return (1);
2325         }
2326 
2327         if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
2328                 zerror(gettext("could not get brand for zone %s"), zonename);
2329                 return (1);
2330         }
2331         /*
2332          * In the alternate root environment, the only supported
2333          * operations are mount and unmount.  In this case, just treat
2334          * the zone as native if it is cluster.  Cluster zones can be
2335          * native for the purpose of LU or upgrade, and the cluster
2336          * brand may not exist in the miniroot (such as in net install
2337          * upgrade).
2338          */
2339         if (zonecfg_default_brand(default_brand,
2340             sizeof (default_brand)) != Z_OK) {
2341                 zerror(gettext("unable to determine default brand"));
2342                 return (1);
2343         }
2344         if (zonecfg_in_alt_root() &&
2345             strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2346                 (void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2347         }
2348 
2349         if ((bh = brand_open(zonebrand)) == NULL) {
2350                 zerror(gettext("could not open brand for zone %s"), zonename);
2351                 return (1);
2352         }
2353 
2354         /*
2355          * The 'interactive' parameter (-i option) indicates that we're running
2356          * a command interactively. In this case we skip prep_args so that we
2357          * don't prepend the 'su root -c' preamble to the command invocation
2358          * since the 'su' command typically will execute a setpgrp which will
2359          * disassociate the actual command from the controlling terminal that
2360          * we (zlogin) setup.
2361          */
2362         if (!iflag) {
2363                 if ((new_args = prep_args(bh, zonename, login, proc_args))
2364                     == NULL) {
2365                         zperror(gettext("could not assemble new arguments"));
2366                         brand_close(bh);
2367                         return (1);
2368                 }
2369         }
2370 
2371         /*
2372          * Get the brand specific user_cmd.  This command is used to get
2373          * a passwd(4) entry for login.
2374          */
2375         if (!interactive && !failsafe) {
2376                 if (zone_get_user_cmd(bh, login, user_cmd,
2377                     sizeof (user_cmd)) == NULL) {
2378                         zerror(gettext("could not get user_cmd for zone %s"),
2379                             zonename);
2380                         brand_close(bh);
2381                         return (1);
2382                 }
2383         }
2384         brand_close(bh);
2385 
2386         if ((new_env = prep_env()) == NULL) {
2387                 zperror(gettext("could not assemble new environment"));
2388                 return (1);
2389         }
2390 
2391         if (!interactive) {
2392                 if (nflag) {
2393                         int nfd;
2394 
2395                         if ((nfd = open(_PATH_DEVNULL, O_RDONLY)) < 0) {
2396                                 zperror(gettext("failed to open null device"));
2397                                 return (1);
2398                         }
2399                         if (nfd != STDIN_FILENO) {
2400                                 if (dup2(nfd, STDIN_FILENO) < 0) {
2401                                         zperror(gettext(
2402                                             "failed to dup2 null device"));
2403                                         return (1);
2404                                 }
2405                                 (void) close(nfd);
2406                         }
2407                         /* /dev/null is now standard input */
2408                 }
2409                 return (noninteractive_login(zonename, user_cmd, zoneid,
2410                     new_args, new_env));
2411         }
2412 
2413         if (zonecfg_in_alt_root()) {
2414                 zerror(gettext("cannot use interactive login with scratch "
2415                     "zone"));
2416                 return (1);
2417         }
2418 
2419         /*
2420          * Things are more complex in interactive mode; we get the
2421          * master side of the pty, then place the user's terminal into
2422          * raw mode.
2423          */
2424         if (get_master_pty() == -1) {
2425                 zerror(gettext("could not setup master pty device"));
2426                 return (1);
2427         }
2428 
2429         /*
2430          * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2431          */
2432         if ((slavename = ptsname(masterfd)) == NULL) {
2433                 zperror(gettext("failed to get name for pseudo-tty"));
2434                 return (1);
2435         }
2436         if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2437                 (void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2438                     sizeof (slaveshortname));
2439         else
2440                 (void) strlcpy(slaveshortname, slavename,
2441                     sizeof (slaveshortname));
2442 
2443         if (!quiet)
2444                 (void) printf(gettext("[Connected to zone '%s' %s]\n"),
2445                     zonename, slaveshortname);
2446 
2447         if (set_tty_rawmode(STDIN_FILENO) == -1) {
2448                 reset_tty();
2449                 zperror(gettext("failed to set stdin pty to raw mode"));
2450                 return (1);
2451         }
2452 
2453         if (prefork_dropprivs() != 0) {
2454                 reset_tty();
2455                 zperror(gettext("could not allocate privilege set"));
2456                 return (1);
2457         }
2458 
2459         /*
2460          * We must mask SIGCLD until after we have coped with the fork
2461          * sufficiently to deal with it; otherwise we can race and receive the
2462          * signal before child_pid has been initialized (yes, this really
2463          * happens).
2464          */
2465         (void) sigset(SIGCLD, sigcld);
2466         (void) sigemptyset(&block_cld);
2467         (void) sigaddset(&block_cld, SIGCLD);
2468         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2469 
2470         /*
2471          * We activate the contract template at the last minute to
2472          * avoid intermediate functions that could be using fork(2)
2473          * internally.
2474          */
2475         if ((tmpl_fd = init_template()) == -1) {
2476                 reset_tty();
2477                 zperror(gettext("could not create contract"));
2478                 return (1);
2479         }
2480 
2481         if ((child_pid = fork()) == -1) {
2482                 (void) ct_tmpl_clear(tmpl_fd);
2483                 reset_tty();
2484                 zperror(gettext("could not fork"));
2485                 return (1);
2486         } else if (child_pid == 0) { /* child process */
2487                 int slavefd, newslave;
2488 
2489                 (void) ct_tmpl_clear(tmpl_fd);
2490                 (void) close(tmpl_fd);
2491 
2492                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2493 
2494                 if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2495                         return (1);
2496 
2497                 /*
2498                  * Close all fds except for the slave pty.
2499                  */
2500                 (void) fdwalk(close_func, &slavefd);
2501 
2502                 /*
2503                  * Temporarily dup slavefd to stderr; that way if we have
2504                  * to print out that zone_enter failed, the output will
2505                  * have somewhere to go.
2506                  */
2507                 if (slavefd != STDERR_FILENO)
2508                         (void) dup2(slavefd, STDERR_FILENO);
2509 
2510                 if (zone_enter(zoneid) == -1) {
2511                         zerror(gettext("could not enter zone %s: %s"),
2512                             zonename, strerror(errno));
2513                         return (1);
2514                 }
2515 
2516                 /* Note: we're now inside the zone, can't use gettext anymore */
2517 
2518                 if (slavefd != STDERR_FILENO)
2519                         (void) close(STDERR_FILENO);
2520 
2521                 /*
2522                  * We take pains to get this process into a new process
2523                  * group, and subsequently a new session.  In this way,
2524                  * we'll have a session which doesn't yet have a controlling
2525                  * terminal.  When we open the slave, it will become the
2526                  * controlling terminal; no PIDs concerning pgrps or sids
2527                  * will leak inappropriately into the zone.
2528                  */
2529                 (void) setpgrp();
2530 
2531                 /*
2532                  * We need the slave pty to be referenced from the zone's
2533                  * /dev in order to ensure that the devt's, etc are all
2534                  * correct.  Otherwise we break ttyname and the like.
2535                  */
2536                 if ((newslave = open(slavename, O_RDWR)) == -1) {
2537                         (void) close(slavefd);
2538                         return (1);
2539                 }
2540                 (void) close(slavefd);
2541                 slavefd = newslave;
2542 
2543                 /*
2544                  * dup the slave to the various FDs, so that when the
2545                  * spawned process does a write/read it maps to the slave
2546                  * pty.
2547                  */
2548                 (void) dup2(slavefd, STDIN_FILENO);
2549                 (void) dup2(slavefd, STDOUT_FILENO);
2550                 (void) dup2(slavefd, STDERR_FILENO);
2551                 if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2552                     slavefd != STDERR_FILENO) {
2553                         (void) close(slavefd);
2554                 }
2555 
2556                 /*
2557                  * In failsafe mode, we don't use login(1), so don't try
2558                  * setting up a utmpx entry.
2559                  *
2560                  * A branded zone may have very different utmpx semantics.
2561                  * At the moment, we only have two brand types:
2562                  * Illumos-like (native, sn1) and Linux.  In the Illumos
2563                  * case, we know exactly how to do the necessary utmpx
2564                  * setup.  Fortunately for us, the Linux /bin/login is
2565                  * prepared to deal with a non-initialized utmpx entry, so
2566                  * we can simply skip it.  If future brands don't fall into
2567                  * either category, we'll have to add a per-brand utmpx
2568                  * setup hook.
2569                  */
2570                 if (!failsafe && (strcmp(zonebrand, "lx") != 0))
2571                         if (setup_utmpx(slaveshortname) == -1)
2572                                 return (1);
2573 
2574                 /*
2575                  * The child needs to run as root to
2576                  * execute the brand's login program.
2577                  */
2578                 if (setuid(0) == -1) {
2579                         zperror("insufficient privilege");
2580                         return (1);
2581                 }
2582 
2583                 if (iflag) {
2584                         (void) execve(proc_args[0], proc_args, new_env);
2585                 } else {
2586                         (void) execve(new_args[0], new_args, new_env);
2587                 }
2588                 zperror("exec failure");
2589                 return (ENOEXEC);
2590         }
2591 
2592         (void) ct_tmpl_clear(tmpl_fd);
2593         (void) close(tmpl_fd);
2594 
2595         /*
2596          * The rest is only for the parent process.
2597          */
2598         (void) sigset(SIGWINCH, sigwinch);
2599 
2600         postfork_dropprivs();
2601 
2602         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2603         doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2604 
2605         reset_tty();
2606         if (!quiet)
2607                 (void) fprintf(stderr,
2608                     gettext("\n[Connection to zone '%s' %s closed]\n"),
2609                     zonename, slaveshortname);
2610 
2611         if (pollerr != 0) {
2612                 (void) fprintf(stderr, gettext("Error: connection closed due "
2613                     "to unexpected pollevents=0x%x.\n"), pollerr);
2614                 return (EPIPE);
2615         }
2616 
2617         /* reap child and get its status */
2618         if (waitid(P_PID, child_pid, &si, WEXITED | WNOHANG) == -1) {
2619                 estatus = errno;
2620         } else if (si.si_pid == 0) {
2621                 estatus = ECHILD;
2622         } else if (si.si_code == CLD_EXITED) {
2623                 estatus = si.si_status;
2624         } else {
2625                 estatus = ECONNABORTED;
2626         }
2627 
2628         return (estatus);
2629 }