1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2015, Joyent, Inc.
  29  * Copyright 2023 Oxide Computer Company
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/uio.h>
  34 #include <sys/param.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/cred.h>
  37 #include <sys/policy.h>
  38 #include <sys/debug.h>
  39 #include <sys/errno.h>
  40 #include <sys/file.h>
  41 #include <sys/inline.h>
  42 #include <sys/kmem.h>
  43 #include <sys/proc.h>
  44 #include <sys/brand.h>
  45 #include <sys/regset.h>
  46 #include <sys/sysmacros.h>
  47 #include <sys/systm.h>
  48 #include <sys/vfs.h>
  49 #include <sys/vnode.h>
  50 #include <sys/signal.h>
  51 #include <sys/auxv.h>
  52 #include <sys/user.h>
  53 #include <sys/class.h>
  54 #include <sys/fault.h>
  55 #include <sys/syscall.h>
  56 #include <sys/procfs.h>
  57 #include <sys/zone.h>
  58 #include <sys/copyops.h>
  59 #include <sys/schedctl.h>
  60 #include <vm/as.h>
  61 #include <vm/seg.h>
  62 #include <fs/proc/prdata.h>
  63 #include <sys/contract/process_impl.h>
  64 #include <sys/stdalign.h>
  65 
  66 static  void    pr_settrace(proc_t *, sigset_t *);
  67 static  int     pr_setfpregs(prnode_t *, prfpregset_t *);
  68 static  int     pr_setxregs(prnode_t *, prxregset_t *);
  69 static  int     pr_setvaddr(prnode_t *, caddr_t);
  70 static  int     pr_clearsig(prnode_t *);
  71 static  int     pr_clearflt(prnode_t *);
  72 static  int     pr_watch(prnode_t *, prwatch_t *, int *);
  73 static  int     pr_agent(prnode_t *, prgregset_t, int *);
  74 static  int     pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
  75 static  int     pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
  76 static  int     pr_spriv(proc_t *, prpriv_t *, cred_t *);
  77 static  int     pr_szoneid(proc_t *, zoneid_t, cred_t *);
  78 static  void    pauselwps(proc_t *);
  79 static  void    unpauselwps(proc_t *);
  80 
  81 /*
  82  * This union represents the size of commands that are generally fixed size in
  83  * /proc. There are some commands that are variable size because the actual data
  84  * is structured. Of things in the latter category, some of these are the same
  85  * across all architectures (e.g. prcred_t, prpriv_t) and some vary and are
  86  * opaque (e.g. the prxregset_t).
  87  */
  88 typedef union {
  89         long            sig;            /* PCKILL, PCUNKILL */
  90         long            nice;           /* PCNICE */
  91         long            timeo;          /* PCTWSTOP */
  92         ulong_t         flags;          /* PCRUN, PCSET, PCUNSET */
  93         caddr_t         vaddr;          /* PCSVADDR */
  94         siginfo_t       siginfo;        /* PCSSIG */
  95         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
  96         fltset_t        fltset;         /* PCSFAULT */
  97         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
  98         prgregset_t     prgregset;      /* PCSREG, PCAGENT */
  99         prfpregset_t    prfpregset;     /* PCSFPREG */
 100         prwatch_t       prwatch;        /* PCWATCH */
 101         priovec_t       priovec;        /* PCREAD, PCWRITE */
 102         prcred_t        prcred;         /* PCSCRED */
 103         prpriv_t        prpriv;         /* PCSPRIV */
 104         long            przoneid;       /* PCSZONE */
 105 } arg_t;
 106 
 107 static boolean_t
 108 prwritectl_pcscredx_sizef(const void *datap, size_t *sizep)
 109 {
 110         const prcred_t *cred = datap;
 111 
 112         if (cred->pr_ngroups < 0 || cred->pr_ngroups > ngroups_max) {
 113                 return (B_FALSE);
 114         }
 115 
 116         if (cred->pr_ngroups == 0) {
 117                 *sizep = 0;
 118         } else {
 119                 *sizep = (cred->pr_ngroups - 1) * sizeof (gid_t);
 120         }
 121         return (B_TRUE);
 122 }
 123 
 124 static boolean_t
 125 prwritectl_pcspriv_sizef(const void *datap, size_t *sizep)
 126 {
 127         const prpriv_t *priv = datap;
 128         *sizep = priv_prgetprivsize(priv) - sizeof (prpriv_t);
 129         return (B_TRUE);
 130 }
 131 
 132 /*
 133  * This structure represents a single /proc write command that we support and
 134  * metadata about how to ensure we have sufficient data for it. To determine the
 135  * data that we need to read, this combines information from three different
 136  * sources for a given named command in 'pcs_cmd'. The main goal is to first
 137  * make sure we have the right minimum amount of information so we can read and
 138  * validate the data around variable length structures.
 139  *
 140  *   o Most commands have a fixed static size. This is represented in the
 141  *     pcs_size member. This also is used to represent the base structure size
 142  *     in the case of entries like PCSCREDX.
 143  *
 144  *   o Other commands have an unknown minimum size to determine how much data
 145  *     there is and they use the pcs_minf() function to determine the right
 146  *     value. This is often unknown at compile time because it is say a
 147  *     machdep or ISA based feature (ala PCSXREGS) and we'd rather not #ifdef
 148  *     this code to death. This may be skipped and is for most things. The value
 149  *     it returns is added to the static value.
 150  *
 151  *   o The final piece is the pcs_sizef() function pointer which determines the
 152  *     total required size for this. It is given a pointer that has at least
 153  *     pcs_size and pcs_minf() bytes. This is used to determine the total
 154  *     expected size of the structure. Callers must not dereference data beyond
 155  *     what they've indicated previously. This should only return exra bytes
 156  *     that are required beyond what was already indicated between the two
 157  *     functions.
 158  *
 159  * In all cases, the core prwritectl() logic will determine if there is
 160  * sufficient step along the way for each of these to proceed.
 161  */
 162 typedef struct proc_control_info {
 163         long    pcs_cmd;
 164         size_t  pcs_size;
 165         boolean_t (*pcs_minf)(size_t *);
 166         boolean_t (*pcs_sizef)(const void *, size_t *);
 167 } proc_control_info_t;
 168 
 169 static const proc_control_info_t proc_ctl_info[] = {
 170         { PCNULL, 0, NULL, NULL },
 171         { PCSTOP, 0, NULL, NULL },
 172         { PCDSTOP, 0, NULL, NULL },
 173         { PCWSTOP, 0, NULL, NULL },
 174         { PCCSIG, 0, NULL, NULL },
 175         { PCCFAULT, 0, NULL, NULL },
 176         { PCSSIG, sizeof (siginfo_t), NULL, NULL },
 177         { PCTWSTOP, sizeof (long), NULL, NULL },
 178         { PCKILL, sizeof (long), NULL, NULL },
 179         { PCUNKILL, sizeof (long), NULL, NULL },
 180         { PCNICE, sizeof (long), NULL, NULL },
 181         { PCRUN, sizeof (ulong_t), NULL, NULL },
 182         { PCSET, sizeof (ulong_t), NULL, NULL },
 183         { PCUNSET, sizeof (ulong_t), NULL, NULL },
 184         { PCSTRACE, sizeof (sigset_t), NULL, NULL },
 185         { PCSHOLD, sizeof (sigset_t), NULL, NULL },
 186         { PCSFAULT, sizeof (fltset_t), NULL, NULL },
 187         { PCSENTRY, sizeof (sysset_t), NULL, NULL },
 188         { PCSEXIT, sizeof (sysset_t), NULL, NULL },
 189         { PCSREG, sizeof (prgregset_t), NULL, NULL },
 190         { PCAGENT, sizeof (prgregset_t), NULL, NULL },
 191         { PCSFPREG, sizeof (prfpregset_t), NULL, NULL },
 192         { PCSXREG, 0, prwriteminxreg, prwritesizexreg },
 193         { PCWATCH, sizeof (prwatch_t), NULL },
 194         { PCREAD, sizeof (priovec_t), NULL, NULL },
 195         { PCWRITE, sizeof (priovec_t), NULL, NULL },
 196         { PCSCRED, sizeof (prcred_t), NULL, NULL },
 197         { PCSCREDX, sizeof (prcred_t), NULL, prwritectl_pcscredx_sizef },
 198         { PCSPRIV, sizeof (prpriv_t), NULL, prwritectl_pcspriv_sizef },
 199         { PCSZONE, sizeof (long), NULL },
 200 };
 201 
 202 /*
 203  * We need a default buffer that we're going to allocate when we need memory to
 204  * read control operations. This is on average large enough to hold multiple
 205  * control operations. We leave this as a smaller value on debug builds just
 206  * to exercise our reallocation logic.
 207  */
 208 #ifdef  DEBUG
 209 #define PROC_CTL_DEFSIZE        32
 210 #else
 211 #define PROC_CTL_DEFSIZE        1024
 212 #endif
 213 
 214 /*
 215  * This structure is used to track all of the information that we have around a
 216  * prwritectl call. This is used to reduce function parameters and make state
 217  * clear.
 218  */
 219 typedef struct {
 220         void    *prwc_buf;
 221         size_t  prwc_buflen;
 222         size_t  prwc_curvalid;
 223         uio_t   *prwc_uiop;
 224         prnode_t *prwc_pnp;
 225         boolean_t prwc_locked;
 226         boolean_t prwc_need32;
 227         void    *prwc_buf32;
 228 } prwritectl_t;
 229 
 230 /*
 231  * Attempt to read in at least needed data. If we need to read in data, then we
 232  * will try to fill in as much data as required.
 233  */
 234 static int
 235 prwritectl_readin(prwritectl_t *prwc, size_t needed)
 236 {
 237         int ret;
 238         size_t toread;
 239         void *start;
 240 
 241         /*
 242          * If we have as much data as we need then we're good to go.
 243          */
 244         if (prwc->prwc_curvalid > needed) {
 245                 ASSERT3U(prwc->prwc_buflen, >=, prwc->prwc_curvalid);
 246                 ASSERT3U(prwc->prwc_buflen, >=, needed);
 247                 return (0);
 248         }
 249 
 250         /*
 251          * We don't have all of our data. We must make sure of several things:
 252          *
 253          *   1. That there actually is enough data in the uio_t for what we
 254          *      need, considering what we've already read.
 255          *   2. If the process is locked, at this point, we want to unlock it
 256          *      before we deal with any I/O or memory allocation. Otherwise we
 257          *      can wreak havoc with p_lock / paging.
 258          *   3. We need to make sure that our buffer is large enough to actually
 259          *      fit it all.
 260          *   4. Only at that point can we actually perform the read.
 261          */
 262         if (needed - prwc->prwc_curvalid > prwc->prwc_uiop->uio_resid) {
 263                 return (EINVAL);
 264         }
 265 
 266         if (prwc->prwc_locked) {
 267                 prunlock(prwc->prwc_pnp);
 268                 prwc->prwc_locked = B_FALSE;
 269         }
 270 
 271         if (needed > prwc->prwc_buflen) {
 272                 size_t new_len = P2ROUNDUP(needed, PROC_CTL_DEFSIZE);
 273                 prwc->prwc_buf = kmem_rezalloc(prwc->prwc_buf,
 274                     prwc->prwc_buflen, new_len, KM_SLEEP);
 275                 if (prwc->prwc_need32) {
 276                         prwc->prwc_buf32 = kmem_rezalloc(prwc->prwc_buf32,
 277                             prwc->prwc_buflen, new_len, KM_SLEEP);
 278                 }
 279                 prwc->prwc_buflen = new_len;
 280         }
 281 
 282         toread = MIN(prwc->prwc_buflen - prwc->prwc_curvalid,
 283             prwc->prwc_uiop->uio_resid);
 284         ASSERT3U(toread, >=, needed - prwc->prwc_curvalid);
 285         start = (void *)((uintptr_t)prwc->prwc_buf + prwc->prwc_curvalid);
 286         if ((ret = uiomove(start, toread, UIO_WRITE, prwc->prwc_uiop)) != 0) {
 287                 return (ret);
 288         }
 289 
 290         prwc->prwc_curvalid += toread;
 291         return (0);
 292 }
 293 
 294 static const proc_control_info_t *
 295 prwritectl_cmd_identify(const prwritectl_t *prwc,
 296     const proc_control_info_t *info, size_t ninfo, size_t cmdsize)
 297 {
 298         long cmd;
 299 
 300         ASSERT(cmdsize == sizeof (int32_t) || cmdsize == sizeof (long));
 301         if (cmdsize == 4) {
 302                 cmd = (long)*(int32_t *)prwc->prwc_buf;
 303         } else {
 304                 cmd = *(long *)prwc->prwc_buf;
 305         }
 306 
 307 
 308         for (size_t i = 0; i < ninfo; i++) {
 309                 if (info[i].pcs_cmd == cmd) {
 310                         return (&info[i]);
 311                 }
 312         }
 313 
 314         return (NULL);
 315 }
 316 
 317 /*
 318  * Control operations (lots).
 319  *
 320  * Users can submit one or more commands to us in the uio_t. They are required
 321  * to always be complete messages. The first one that fails will cause all
 322  * subsequent things to fail. Processing this can be a little tricky as the
 323  * actual data size that may be required is variable, not all structures are
 324  * fixed sizes and some vary based on the instructing set (e.g. x86 vs.
 325  * something else).
 326  *
 327  * The way that we handle process locking deserves some consideration. Prior to
 328  * the colonization of prwritectl and the support for dynamic sizing of data,
 329  * the logic would try to read in a large chunk of data and keep a process
 330  * locked throughout that period and then unlock it before reading more data. As
 331  * such, we mimic that logically and basically lock it before executing the
 332  * first (or any subsequent) command and then only unlock it either when we're
 333  * done entirely or we need to allocate memory or read from the process.
 334  *
 335  * This function is a common implementation for both the ILP32 and LP64 entry
 336  * points as they are mostly the same except for the sizing and control function
 337  * we call.
 338  */
 339 int
 340 prwritectl_common(vnode_t *vp, uio_t *uiop, cred_t *cr,
 341     const proc_control_info_t *proc_info, size_t ninfo, size_t cmdsize,
 342     int (*pr_controlf)(long, void *, prnode_t *, cred_t *))
 343 {
 344         int ret;
 345         prwritectl_t prwc;
 346 
 347         VERIFY(cmdsize == sizeof (int32_t) || cmdsize == sizeof (long));
 348 
 349         bzero(&prwc, sizeof (prwc));
 350         prwc.prwc_pnp = VTOP(vp);
 351         prwc.prwc_uiop = uiop;
 352         prwc.prwc_need32 = cmdsize == sizeof (int32_t);
 353 
 354         /*
 355          * We may have multiple commands to read and want to try to minimize the
 356          * amount of reading that we do. Our callers expect us to have a
 357          * contiguous buffer for a command's actual implementation. However, we
 358          * must have at least a single long worth of data, otherwise it's not
 359          * worth continuing.
 360          */
 361         while (uiop->uio_resid > 0 || prwc.prwc_curvalid > 0) {
 362                 const proc_control_info_t *proc_cmd;
 363                 void *data;
 364 
 365                 /*
 366                  * Check if we have enough data to identify a command. If not,
 367                  * we read as much as we can in one gulp.
 368                  */
 369                 if ((ret = prwritectl_readin(&prwc, cmdsize)) != 0) {
 370                         goto out;
 371                 }
 372 
 373                 /*
 374                  * Identify the command and figure out how how much data we
 375                  * should have read in the kernel. Some commands have a variable
 376                  * length and we need to make sure the minimum is met before
 377                  * asking how much there is in general. Most things know what
 378                  * the minimum length is and this pcs_minf() is not implemented.
 379                  * However things that are ISA-specific require us to ask that
 380                  * first.
 381                  *
 382                  * We also must be aware that there may not actually be enough
 383                  * data present in the uio_t.
 384                  */
 385                 if ((proc_cmd = prwritectl_cmd_identify(&prwc, proc_info,
 386                     ninfo, cmdsize)) == NULL) {
 387                         ret = EINVAL;
 388                         goto out;
 389                 }
 390 
 391                 size_t needed_data = cmdsize + proc_cmd->pcs_size;
 392                 if (proc_cmd->pcs_minf != NULL) {
 393                         size_t min;
 394 
 395                         if (!proc_cmd->pcs_minf(&min)) {
 396                                 ret = EINVAL;
 397                                 goto out;
 398                         }
 399 
 400                         needed_data += min;
 401                 }
 402 
 403                 if (proc_cmd->pcs_sizef != NULL) {
 404                         size_t extra;
 405 
 406                         /*
 407                          * Make sure we have the minimum amount of data that
 408                          * they asked us to between the static and minf
 409                          * function.
 410                          */
 411                         if ((ret = prwritectl_readin(&prwc, needed_data)) !=
 412                             0) {
 413                                 goto out;
 414                         }
 415 
 416                         VERIFY3U(prwc.prwc_curvalid, >, cmdsize);
 417                         data = (void *)((uintptr_t)prwc.prwc_buf + cmdsize);
 418                         if (!proc_cmd->pcs_sizef(data, &extra)) {
 419                                 ret = EINVAL;
 420                                 goto out;
 421                         }
 422 
 423                         needed_data += extra;
 424                 }
 425 
 426                 /*
 427                  * Now that we know how much data we're supposed to have,
 428                  * finally ensure we have the total amount we need.
 429                  */
 430                 if ((ret = prwritectl_readin(&prwc, needed_data)) != 0) {
 431                         goto out;
 432                 }
 433 
 434                 /*
 435                  * /proc has traditionally assumed control writes come in
 436                  * multiples of a long. This is 4 bytes for ILP32 and 8 bytes
 437                  * for LP64. When calculating the required size for a structure,
 438                  * it would always round that up to the next long. However, the
 439                  * exact combination of circumstances changes with the
 440                  * introduction of the 64-bit kernel. For 64-bit processes we
 441                  * round up when the current command we're processing isn't the
 442                  * last one.
 443                  *
 444                  * Because of our tracking structures and caching we need to
 445                  * look beyond the uio_t to make this determination. In
 446                  * particular, the uio_t can have a zero resid, but we may still
 447                  * have additional data to read as indicated by prwc_curvalid
 448                  * exceeded the current command size. In the end, we must check
 449                  * both of these cases.
 450                  */
 451                 if ((needed_data % cmdsize) != 0) {
 452                         if (cmdsize == sizeof (int32_t) ||
 453                             prwc.prwc_curvalid > needed_data ||
 454                             prwc.prwc_uiop->uio_resid > 0) {
 455                                 needed_data = P2ROUNDUP(needed_data,
 456                                     cmdsize);
 457                                 if ((ret = prwritectl_readin(&prwc,
 458                                     needed_data)) != 0) {
 459                                         goto out;
 460                                 }
 461                         }
 462                 }
 463 
 464                 if (!prwc.prwc_locked) {
 465                         ret = prlock(prwc.prwc_pnp, ZNO);
 466                         if (ret != 0) {
 467                                 goto out;
 468                         }
 469                         prwc.prwc_locked = B_TRUE;
 470                 }
 471 
 472                 /*
 473                  * Run our actual command. When there is an error, then the
 474                  * underlying pr_control call will have unlocked the prnode_t
 475                  * on our behalf. pr_control can return -1, which is a special
 476                  * error indicating a timeout occurred. In such a case the node
 477                  * is unlocked; however, that we are supposed to continue
 478                  * processing commands regardless.
 479                  *
 480                  * Finally, we must deal with with one actual wrinkle. The LP64
 481                  * based logic always guarantees that we have data that is
 482                  * 8-byte aligned. However, the ILP32 logic is 4-byte aligned
 483                  * and the rest of the /proc code assumes it can always
 484                  * dereference it. If we're not aligned, we have to bcopy it to
 485                  * a temporary buffer.
 486                  */
 487                 data = (void *)((uintptr_t)prwc.prwc_buf + cmdsize);
 488 #ifdef  DEBUG
 489                 if (cmdsize == sizeof (long)) {
 490                         VERIFY0((uintptr_t)data % alignof (long));
 491                 }
 492 #endif
 493                 if (prwc.prwc_need32 && ((uintptr_t)data % alignof (long)) !=
 494                     0 && needed_data > cmdsize) {
 495                         bcopy(data, prwc.prwc_buf32, needed_data - cmdsize);
 496                         data = prwc.prwc_buf32;
 497                 }
 498                 ret = pr_controlf(proc_cmd->pcs_cmd, data, prwc.prwc_pnp, cr);
 499                 if (ret != 0) {
 500                         prwc.prwc_locked = B_FALSE;
 501                         if (ret > 0) {
 502                                 goto out;
 503                         }
 504                 }
 505 
 506                 /*
 507                  * Finally, now that we have processed this command, we need to
 508                  * move on. To make our life simple, we basically shift all the
 509                  * data in our buffer over to indicate it's been consumed. While
 510                  * a little wasteful, this simplifies buffer management and
 511                  * guarantees that command processing uses a semi-sanitized
 512                  * state. Visually, this is the following transformation:
 513                  *
 514                  *  0                   20              prwc.prwc_curvalid
 515                  *   +------------------+----------------+
 516                  *   |   needed_data    | remaining_data |
 517                  *   +------------------+----------------+
 518                  *
 519                  * In the above example we are shifting all the data over by 20,
 520                  * so remaining data starts at 0. This leaves us needed_data
 521                  * bytes to clean up from what was valid.
 522                  */
 523                 if (prwc.prwc_buf32 != NULL) {
 524                         bzero(prwc.prwc_buf32, needed_data - cmdsize);
 525                 }
 526 
 527                 if (prwc.prwc_curvalid > needed_data) {
 528                         size_t save_size = prwc.prwc_curvalid - needed_data;
 529                         void *first_save = (void *)((uintptr_t)prwc.prwc_buf +
 530                             needed_data);
 531                         memmove(prwc.prwc_buf, first_save, save_size);
 532                         void *first_zero = (void *)((uintptr_t)prwc.prwc_buf +
 533                             save_size);
 534                         bzero(first_zero, needed_data);
 535                 } else {
 536                         bzero(prwc.prwc_buf, prwc.prwc_curvalid);
 537                 }
 538                 prwc.prwc_curvalid -= needed_data;
 539         }
 540 
 541         /*
 542          * We've managed to successfully process everything. We can actually say
 543          * this was successful now.
 544          */
 545         ret = 0;
 546 
 547 out:
 548         if (prwc.prwc_locked) {
 549                 prunlock(prwc.prwc_pnp);
 550                 prwc.prwc_locked = B_FALSE;
 551         }
 552 
 553         if (prwc.prwc_buf != NULL) {
 554                 kmem_free(prwc.prwc_buf, prwc.prwc_buflen);
 555         }
 556 
 557         if (prwc.prwc_buf32 != NULL) {
 558                 VERIFY(prwc.prwc_need32);
 559                 kmem_free(prwc.prwc_buf32, prwc.prwc_buflen);
 560         }
 561 
 562         return (ret);
 563 }
 564 
 565 static int
 566 pr_control(long cmd, void *generic, prnode_t *pnp, cred_t *cr)
 567 {
 568         prcommon_t *pcp;
 569         proc_t *p;
 570         int unlocked;
 571         int error = 0;
 572         arg_t *argp = generic;
 573 
 574         if (cmd == PCNULL)
 575                 return (0);
 576 
 577         pcp = pnp->pr_common;
 578         p = pcp->prc_proc;
 579         ASSERT(p != NULL);
 580 
 581         /* System processes defy control. */
 582         if (p->p_flag & SSYS) {
 583                 prunlock(pnp);
 584                 return (EBUSY);
 585         }
 586 
 587         switch (cmd) {
 588 
 589         default:
 590                 error = EINVAL;
 591                 break;
 592 
 593         case PCSTOP:    /* direct process or lwp to stop and wait for stop */
 594         case PCDSTOP:   /* direct process or lwp to stop, don't wait */
 595         case PCWSTOP:   /* wait for process or lwp to stop */
 596         case PCTWSTOP:  /* wait for process or lwp to stop, with timeout */
 597                 {
 598                         time_t timeo;
 599 
 600                         /*
 601                          * Can't apply to a system process.
 602                          */
 603                         if (p->p_as == &kas) {
 604                                 error = EBUSY;
 605                                 break;
 606                         }
 607 
 608                         if (cmd == PCSTOP || cmd == PCDSTOP)
 609                                 pr_stop(pnp);
 610 
 611                         if (cmd == PCDSTOP)
 612                                 break;
 613 
 614                         /*
 615                          * If an lwp is waiting for itself or its process,
 616                          * don't wait. The stopped lwp would never see the
 617                          * fact that it is stopped.
 618                          */
 619                         if ((pcp->prc_flags & PRC_LWP)?
 620                             (pcp->prc_thread == curthread) : (p == curproc)) {
 621                                 if (cmd == PCWSTOP || cmd == PCTWSTOP)
 622                                         error = EBUSY;
 623                                 break;
 624                         }
 625 
 626                         timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
 627                         if ((error = pr_wait_stop(pnp, timeo)) != 0)
 628                                 return (error);
 629 
 630                         break;
 631                 }
 632 
 633         case PCRUN:     /* make lwp or process runnable */
 634                 error = pr_setrun(pnp, argp->flags);
 635                 break;
 636 
 637         case PCSTRACE:  /* set signal trace mask */
 638                 pr_settrace(p,  &argp->sigset);
 639                 break;
 640 
 641         case PCSSIG:    /* set current signal */
 642                 error = pr_setsig(pnp, &argp->siginfo);
 643                 if (argp->siginfo.si_signo == SIGKILL && error == 0) {
 644                         prunlock(pnp);
 645                         pr_wait_die(pnp);
 646                         return (-1);
 647                 }
 648                 break;
 649 
 650         case PCKILL:    /* send signal */
 651                 error = pr_kill(pnp, (int)argp->sig, cr);
 652                 if (error == 0 && argp->sig == SIGKILL) {
 653                         prunlock(pnp);
 654                         pr_wait_die(pnp);
 655                         return (-1);
 656                 }
 657                 break;
 658 
 659         case PCUNKILL:  /* delete a pending signal */
 660                 error = pr_unkill(pnp, (int)argp->sig);
 661                 break;
 662 
 663         case PCNICE:    /* set nice priority */
 664                 error = pr_nice(p, (int)argp->nice, cr);
 665                 break;
 666 
 667         case PCSENTRY:  /* set syscall entry bit mask */
 668         case PCSEXIT:   /* set syscall exit bit mask */
 669                 pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
 670                 break;
 671 
 672         case PCSET:     /* set process flags */
 673                 error = pr_set(p, argp->flags);
 674                 break;
 675 
 676         case PCUNSET:   /* unset process flags */
 677                 error = pr_unset(p, argp->flags);
 678                 break;
 679 
 680         case PCSREG:    /* set general registers */
 681                 {
 682                         kthread_t *t = pr_thread(pnp);
 683 
 684                         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
 685                                 thread_unlock(t);
 686                                 error = EBUSY;
 687                         } else {
 688                                 thread_unlock(t);
 689                                 mutex_exit(&p->p_lock);
 690                                 prsetprregs(ttolwp(t), argp->prgregset, 0);
 691                                 mutex_enter(&p->p_lock);
 692                         }
 693                         break;
 694                 }
 695 
 696         case PCSFPREG:  /* set floating-point registers */
 697                 error = pr_setfpregs(pnp, &argp->prfpregset);
 698                 break;
 699 
 700         case PCSXREG:   /* set extra registers */
 701                 error = pr_setxregs(pnp, (prxregset_t *)argp);
 702                 break;
 703 
 704         case PCSVADDR:  /* set virtual address at which to resume */
 705                 error = pr_setvaddr(pnp, argp->vaddr);
 706                 break;
 707 
 708         case PCSHOLD:   /* set signal-hold mask */
 709                 pr_sethold(pnp, &argp->sigset);
 710                 break;
 711 
 712         case PCSFAULT:  /* set mask of traced faults */
 713                 pr_setfault(p, &argp->fltset);
 714                 break;
 715 
 716         case PCCSIG:    /* clear current signal */
 717                 error = pr_clearsig(pnp);
 718                 break;
 719 
 720         case PCCFAULT:  /* clear current fault */
 721                 error = pr_clearflt(pnp);
 722                 break;
 723 
 724         case PCWATCH:   /* set or clear watched areas */
 725                 error = pr_watch(pnp, &argp->prwatch, &unlocked);
 726                 if (error && unlocked)
 727                         return (error);
 728                 break;
 729 
 730         case PCAGENT:   /* create the /proc agent lwp in the target process */
 731                 error = pr_agent(pnp, argp->prgregset, &unlocked);
 732                 if (error && unlocked)
 733                         return (error);
 734                 break;
 735 
 736         case PCREAD:    /* read from the address space */
 737                 error = pr_rdwr(p, UIO_READ, &argp->priovec);
 738                 break;
 739 
 740         case PCWRITE:   /* write to the address space */
 741                 error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
 742                 break;
 743 
 744         case PCSCRED:   /* set the process credentials */
 745         case PCSCREDX:
 746                 error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
 747                 break;
 748 
 749         case PCSPRIV:   /* set the process privileges */
 750                 error = pr_spriv(p, &argp->prpriv, cr);
 751                 break;
 752         case PCSZONE:   /* set the process's zoneid credentials */
 753                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
 754                 break;
 755         }
 756 
 757         if (error)
 758                 prunlock(pnp);
 759         return (error);
 760 }
 761 
 762 int
 763 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
 764 {
 765         return (prwritectl_common(vp, uiop, cr, proc_ctl_info,
 766             ARRAY_SIZE(proc_ctl_info), sizeof (long), pr_control));
 767 }
 768 
 769 #ifdef _SYSCALL32_IMPL
 770 
 771 typedef union {
 772         int32_t         sig;            /* PCKILL, PCUNKILL */
 773         int32_t         nice;           /* PCNICE */
 774         int32_t         timeo;          /* PCTWSTOP */
 775         uint32_t        flags;          /* PCRUN, PCSET, PCUNSET */
 776         caddr32_t       vaddr;          /* PCSVADDR */
 777         siginfo32_t     siginfo;        /* PCSSIG */
 778         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
 779         fltset_t        fltset;         /* PCSFAULT */
 780         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
 781         prgregset32_t   prgregset;      /* PCSREG, PCAGENT */
 782         prfpregset32_t  prfpregset;     /* PCSFPREG */
 783         prwatch32_t     prwatch;        /* PCWATCH */
 784         priovec32_t     priovec;        /* PCREAD, PCWRITE */
 785         prcred32_t      prcred;         /* PCSCRED */
 786         prpriv_t        prpriv;         /* PCSPRIV */
 787         int32_t         przoneid;       /* PCSZONE */
 788 } arg32_t;
 789 
 790 static  int     pr_setfpregs32(prnode_t *, prfpregset32_t *);
 791 
 792 static boolean_t
 793 prwritectl_pcscredx32_sizef(const void *datap, size_t *sizep)
 794 {
 795         const prcred32_t *cred = datap;
 796 
 797         if (cred->pr_ngroups < 0 || cred->pr_ngroups > ngroups_max) {
 798                 return (B_FALSE);
 799         }
 800 
 801         if (cred->pr_ngroups == 0) {
 802                 *sizep = 0;
 803         } else {
 804                 *sizep = (cred->pr_ngroups - 1) * sizeof (gid32_t);
 805         }
 806         return (B_TRUE);
 807 }
 808 
 809 /*
 810  * When dealing with ILP32 code, we are not at a point where we can assume
 811  * 64-bit aligned data. Any functions that are operating here must be aware of
 812  * that.
 813  */
 814 static const proc_control_info_t proc_ctl_info32[] = {
 815         { PCNULL, 0, NULL, NULL },
 816         { PCSTOP, 0, NULL, NULL },
 817         { PCDSTOP, 0, NULL, NULL },
 818         { PCWSTOP, 0, NULL, NULL },
 819         { PCCSIG, 0, NULL, NULL },
 820         { PCCFAULT, 0, NULL, NULL },
 821         { PCSSIG, sizeof (siginfo32_t), NULL, NULL },
 822         { PCTWSTOP, sizeof (int32_t), NULL, NULL },
 823         { PCKILL, sizeof (int32_t), NULL, NULL },
 824         { PCUNKILL, sizeof (int32_t), NULL, NULL },
 825         { PCNICE, sizeof (int32_t), NULL, NULL },
 826         { PCRUN, sizeof (uint32_t), NULL, NULL },
 827         { PCSET, sizeof (uint32_t), NULL, NULL },
 828         { PCUNSET, sizeof (uint32_t), NULL, NULL },
 829         { PCSVADDR, sizeof (caddr32_t), NULL, NULL },
 830         { PCSTRACE, sizeof (sigset_t), NULL, NULL },
 831         { PCSHOLD, sizeof (sigset_t), NULL, NULL },
 832         { PCSFAULT, sizeof (fltset_t), NULL, NULL },
 833         { PCSENTRY, sizeof (sysset_t), NULL, NULL },
 834         { PCSEXIT, sizeof (sysset_t), NULL, NULL },
 835         { PCSREG, sizeof (prgregset32_t), NULL, NULL },
 836         { PCAGENT, sizeof (prgregset32_t), NULL, NULL },
 837         { PCSFPREG, sizeof (prfpregset32_t), NULL, NULL },
 838         { PCSXREG, 0, prwriteminxreg, prwritesizexreg },
 839         { PCWATCH, sizeof (prwatch32_t), NULL },
 840         { PCREAD, sizeof (priovec32_t), NULL, NULL },
 841         { PCWRITE, sizeof (priovec32_t), NULL, NULL },
 842         { PCSCRED, sizeof (prcred32_t), NULL, NULL },
 843         { PCSCREDX, sizeof (prcred32_t), NULL, prwritectl_pcscredx32_sizef },
 844         { PCSPRIV, sizeof (prpriv_t), NULL, prwritectl_pcspriv_sizef },
 845         { PCSZONE, sizeof (long), NULL },
 846 };
 847 
 848 static int
 849 pr_control32(long cmd, void *generic, prnode_t *pnp, cred_t *cr)
 850 {
 851         prcommon_t *pcp;
 852         proc_t *p;
 853         int unlocked;
 854         int error = 0;
 855         arg32_t *argp = generic;
 856 
 857         if (cmd == PCNULL)
 858                 return (0);
 859 
 860         pcp = pnp->pr_common;
 861         p = pcp->prc_proc;
 862         ASSERT(p != NULL);
 863 
 864         if (p->p_flag & SSYS) {
 865                 prunlock(pnp);
 866                 return (EBUSY);
 867         }
 868 
 869         switch (cmd) {
 870 
 871         default:
 872                 error = EINVAL;
 873                 break;
 874 
 875         case PCSTOP:    /* direct process or lwp to stop and wait for stop */
 876         case PCDSTOP:   /* direct process or lwp to stop, don't wait */
 877         case PCWSTOP:   /* wait for process or lwp to stop */
 878         case PCTWSTOP:  /* wait for process or lwp to stop, with timeout */
 879                 {
 880                         time_t timeo;
 881 
 882                         /*
 883                          * Can't apply to a system process.
 884                          */
 885                         if (p->p_as == &kas) {
 886                                 error = EBUSY;
 887                                 break;
 888                         }
 889 
 890                         if (cmd == PCSTOP || cmd == PCDSTOP)
 891                                 pr_stop(pnp);
 892 
 893                         if (cmd == PCDSTOP)
 894                                 break;
 895 
 896                         /*
 897                          * If an lwp is waiting for itself or its process,
 898                          * don't wait. The lwp will never see the fact that
 899                          * itself is stopped.
 900                          */
 901                         if ((pcp->prc_flags & PRC_LWP)?
 902                             (pcp->prc_thread == curthread) : (p == curproc)) {
 903                                 if (cmd == PCWSTOP || cmd == PCTWSTOP)
 904                                         error = EBUSY;
 905                                 break;
 906                         }
 907 
 908                         timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
 909                         if ((error = pr_wait_stop(pnp, timeo)) != 0)
 910                                 return (error);
 911 
 912                         break;
 913                 }
 914 
 915         case PCRUN:     /* make lwp or process runnable */
 916                 error = pr_setrun(pnp, (ulong_t)argp->flags);
 917                 break;
 918 
 919         case PCSTRACE:  /* set signal trace mask */
 920                 pr_settrace(p,  &argp->sigset);
 921                 break;
 922 
 923         case PCSSIG:    /* set current signal */
 924                 if (PROCESS_NOT_32BIT(p))
 925                         error = EOVERFLOW;
 926                 else {
 927                         int sig = (int)argp->siginfo.si_signo;
 928                         siginfo_t siginfo;
 929 
 930                         bzero(&siginfo, sizeof (siginfo));
 931                         siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
 932                         error = pr_setsig(pnp, &siginfo);
 933                         if (sig == SIGKILL && error == 0) {
 934                                 prunlock(pnp);
 935                                 pr_wait_die(pnp);
 936                                 return (-1);
 937                         }
 938                 }
 939                 break;
 940 
 941         case PCKILL:    /* send signal */
 942                 error = pr_kill(pnp, (int)argp->sig, cr);
 943                 if (error == 0 && argp->sig == SIGKILL) {
 944                         prunlock(pnp);
 945                         pr_wait_die(pnp);
 946                         return (-1);
 947                 }
 948                 break;
 949 
 950         case PCUNKILL:  /* delete a pending signal */
 951                 error = pr_unkill(pnp, (int)argp->sig);
 952                 break;
 953 
 954         case PCNICE:    /* set nice priority */
 955                 error = pr_nice(p, (int)argp->nice, cr);
 956                 break;
 957 
 958         case PCSENTRY:  /* set syscall entry bit mask */
 959         case PCSEXIT:   /* set syscall exit bit mask */
 960                 pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
 961                 break;
 962 
 963         case PCSET:     /* set process flags */
 964                 error = pr_set(p, (long)argp->flags);
 965                 break;
 966 
 967         case PCUNSET:   /* unset process flags */
 968                 error = pr_unset(p, (long)argp->flags);
 969                 break;
 970 
 971         case PCSREG:    /* set general registers */
 972                 if (PROCESS_NOT_32BIT(p))
 973                         error = EOVERFLOW;
 974                 else {
 975                         kthread_t *t = pr_thread(pnp);
 976 
 977                         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
 978                                 thread_unlock(t);
 979                                 error = EBUSY;
 980                         } else {
 981                                 prgregset_t prgregset;
 982                                 klwp_t *lwp = ttolwp(t);
 983 
 984                                 thread_unlock(t);
 985                                 mutex_exit(&p->p_lock);
 986                                 prgregset_32ton(lwp, argp->prgregset,
 987                                     prgregset);
 988                                 prsetprregs(lwp, prgregset, 0);
 989                                 mutex_enter(&p->p_lock);
 990                         }
 991                 }
 992                 break;
 993 
 994         case PCSFPREG:  /* set floating-point registers */
 995                 if (PROCESS_NOT_32BIT(p))
 996                         error = EOVERFLOW;
 997                 else
 998                         error = pr_setfpregs32(pnp, &argp->prfpregset);
 999                 break;
1000 
1001         case PCSXREG:   /* set extra registers */
1002                 if (PROCESS_NOT_32BIT(p))
1003                         error = EOVERFLOW;
1004                 else
1005                         error = pr_setxregs(pnp, (prxregset_t *)argp);
1006                 break;
1007 
1008         case PCSVADDR:  /* set virtual address at which to resume */
1009                 if (PROCESS_NOT_32BIT(p))
1010                         error = EOVERFLOW;
1011                 else
1012                         error = pr_setvaddr(pnp,
1013                             (caddr_t)(uintptr_t)argp->vaddr);
1014                 break;
1015 
1016         case PCSHOLD:   /* set signal-hold mask */
1017                 pr_sethold(pnp, &argp->sigset);
1018                 break;
1019 
1020         case PCSFAULT:  /* set mask of traced faults */
1021                 pr_setfault(p, &argp->fltset);
1022                 break;
1023 
1024         case PCCSIG:    /* clear current signal */
1025                 error = pr_clearsig(pnp);
1026                 break;
1027 
1028         case PCCFAULT:  /* clear current fault */
1029                 error = pr_clearflt(pnp);
1030                 break;
1031 
1032         case PCWATCH:   /* set or clear watched areas */
1033                 if (PROCESS_NOT_32BIT(p))
1034                         error = EOVERFLOW;
1035                 else {
1036                         prwatch_t prwatch;
1037 
1038                         prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
1039                         prwatch.pr_size = argp->prwatch.pr_size;
1040                         prwatch.pr_wflags = argp->prwatch.pr_wflags;
1041                         prwatch.pr_pad = argp->prwatch.pr_pad;
1042                         error = pr_watch(pnp, &prwatch, &unlocked);
1043                         if (error && unlocked)
1044                                 return (error);
1045                 }
1046                 break;
1047 
1048         case PCAGENT:   /* create the /proc agent lwp in the target process */
1049                 if (PROCESS_NOT_32BIT(p))
1050                         error = EOVERFLOW;
1051                 else {
1052                         prgregset_t prgregset;
1053                         kthread_t *t = pr_thread(pnp);
1054                         klwp_t *lwp = ttolwp(t);
1055                         thread_unlock(t);
1056                         mutex_exit(&p->p_lock);
1057                         prgregset_32ton(lwp, argp->prgregset, prgregset);
1058                         mutex_enter(&p->p_lock);
1059                         error = pr_agent(pnp, prgregset, &unlocked);
1060                         if (error && unlocked)
1061                                 return (error);
1062                 }
1063                 break;
1064 
1065         case PCREAD:    /* read from the address space */
1066         case PCWRITE:   /* write to the address space */
1067                 if (PROCESS_NOT_32BIT(p) || (pnp->pr_flags & PR_OFFMAX))
1068                         error = EOVERFLOW;
1069                 else {
1070                         enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
1071                         priovec_t priovec;
1072 
1073                         priovec.pio_base =
1074                             (void *)(uintptr_t)argp->priovec.pio_base;
1075                         priovec.pio_len = (size_t)argp->priovec.pio_len;
1076                         priovec.pio_offset = (off_t)
1077                             (uint32_t)argp->priovec.pio_offset;
1078                         error = pr_rdwr(p, rw, &priovec);
1079                 }
1080                 break;
1081 
1082         case PCSCRED:   /* set the process credentials */
1083         case PCSCREDX:
1084                 {
1085                         /*
1086                          * All the fields in these structures are exactly the
1087                          * same and so the structures are compatible.  In case
1088                          * this ever changes, we catch this with the ASSERT
1089                          * below.
1090                          */
1091                         prcred_t *prcred = (prcred_t *)&argp->prcred;
1092 
1093 #ifndef __lint
1094                         ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
1095 #endif
1096 
1097                         error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
1098                         break;
1099                 }
1100 
1101         case PCSPRIV:   /* set the process privileges */
1102                 error = pr_spriv(p, &argp->prpriv, cr);
1103                 break;
1104 
1105         case PCSZONE:   /* set the process's zoneid */
1106                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
1107                 break;
1108         }
1109 
1110         if (error)
1111                 prunlock(pnp);
1112         return (error);
1113 }
1114 
1115 int
1116 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
1117 {
1118         return (prwritectl_common(vp, uiop, cr, proc_ctl_info32,
1119             ARRAY_SIZE(proc_ctl_info32), sizeof (int32_t), pr_control32));
1120 }
1121 #endif  /* _SYSCALL32_IMPL */
1122 
1123 /*
1124  * Return the specific or chosen thread/lwp for a control operation.
1125  * Returns with the thread locked via thread_lock(t).
1126  */
1127 kthread_t *
1128 pr_thread(prnode_t *pnp)
1129 {
1130         prcommon_t *pcp = pnp->pr_common;
1131         kthread_t *t;
1132 
1133         if (pcp->prc_flags & PRC_LWP) {
1134                 t = pcp->prc_thread;
1135                 ASSERT(t != NULL);
1136                 thread_lock(t);
1137         } else {
1138                 proc_t *p = pcp->prc_proc;
1139                 t = prchoose(p);        /* returns locked thread */
1140                 ASSERT(t != NULL);
1141         }
1142 
1143         return (t);
1144 }
1145 
1146 /*
1147  * Direct the process or lwp to stop.
1148  */
1149 void
1150 pr_stop(prnode_t *pnp)
1151 {
1152         prcommon_t *pcp = pnp->pr_common;
1153         proc_t *p = pcp->prc_proc;
1154         kthread_t *t;
1155         vnode_t *vp;
1156 
1157         /*
1158          * If already stopped, do nothing; otherwise flag
1159          * it to be stopped the next time it tries to run.
1160          * If sleeping at interruptible priority, set it
1161          * running so it will stop within cv_wait_sig().
1162          *
1163          * Take care to cooperate with jobcontrol: if an lwp
1164          * is stopped due to the default action of a jobcontrol
1165          * stop signal, flag it to be stopped the next time it
1166          * starts due to a SIGCONT signal.
1167          */
1168         if (pcp->prc_flags & PRC_LWP)
1169                 t = pcp->prc_thread;
1170         else
1171                 t = p->p_tlist;
1172         ASSERT(t != NULL);
1173 
1174         do {
1175                 int notify;
1176 
1177                 notify = 0;
1178                 thread_lock(t);
1179                 if (!ISTOPPED(t)) {
1180                         t->t_proc_flag |= TP_PRSTOP;
1181                         t->t_sig_check = 1;  /* do ISSIG */
1182                 }
1183 
1184                 /* Move the thread from wait queue to run queue */
1185                 if (ISWAITING(t))
1186                         setrun_locked(t);
1187 
1188                 if (ISWAKEABLE(t)) {
1189                         if (t->t_wchan0 == NULL)
1190                                 setrun_locked(t);
1191                         else if (!VSTOPPED(t)) {
1192                                 /*
1193                                  * Mark it virtually stopped.
1194                                  */
1195                                 t->t_proc_flag |= TP_PRVSTOP;
1196                                 notify = 1;
1197                         }
1198                 }
1199                 /*
1200                  * force the thread into the kernel
1201                  * if it is not already there.
1202                  */
1203                 prpokethread(t);
1204                 thread_unlock(t);
1205                 if (notify &&
1206                     (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1207                         prnotify(vp);
1208                 if (pcp->prc_flags & PRC_LWP)
1209                         break;
1210         } while ((t = t->t_forw) != p->p_tlist);
1211 
1212         /*
1213          * We do this just in case the thread we asked
1214          * to stop is in holdlwps() (called from cfork()).
1215          */
1216         cv_broadcast(&p->p_holdlwps);
1217 }
1218 
1219 /*
1220  * Sleep until the lwp stops, but cooperate with
1221  * jobcontrol:  Don't wake up if the lwp is stopped
1222  * due to the default action of a jobcontrol stop signal.
1223  * If this is the process file descriptor, sleep
1224  * until all of the process's lwps stop.
1225  */
1226 int
1227 pr_wait_stop(prnode_t *pnp, time_t timeo)
1228 {
1229         prcommon_t *pcp = pnp->pr_common;
1230         proc_t *p = pcp->prc_proc;
1231         timestruc_t rqtime;
1232         timestruc_t *rqtp = NULL;
1233         int timecheck = 0;
1234         kthread_t *t;
1235         int error;
1236 
1237         if (timeo > 0) {     /* millisecond timeout */
1238                 /*
1239                  * Determine the precise future time of the requested timeout.
1240                  */
1241                 timestruc_t now;
1242 
1243                 timecheck = timechanged;
1244                 gethrestime(&now);
1245                 rqtp = &rqtime;
1246                 rqtp->tv_sec = timeo / MILLISEC;
1247                 rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1248                 timespecadd(rqtp, &now);
1249         }
1250 
1251         if (pcp->prc_flags & PRC_LWP) {  /* lwp file descriptor */
1252                 t = pcp->prc_thread;
1253                 ASSERT(t != NULL);
1254                 thread_lock(t);
1255                 while (!ISTOPPED(t) && !VSTOPPED(t)) {
1256                         thread_unlock(t);
1257                         mutex_enter(&pcp->prc_mutex);
1258                         prunlock(pnp);
1259                         error = pr_wait(pcp, rqtp, timecheck);
1260                         if (error)      /* -1 is timeout */
1261                                 return (error);
1262                         if ((error = prlock(pnp, ZNO)) != 0)
1263                                 return (error);
1264                         ASSERT(p == pcp->prc_proc);
1265                         ASSERT(t == pcp->prc_thread);
1266                         thread_lock(t);
1267                 }
1268                 thread_unlock(t);
1269         } else {                        /* process file descriptor */
1270                 t = prchoose(p);        /* returns locked thread */
1271                 ASSERT(t != NULL);
1272                 ASSERT(MUTEX_HELD(&p->p_lock));
1273                 while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1274                     (p->p_flag & SEXITLWPS)) {
1275                         thread_unlock(t);
1276                         mutex_enter(&pcp->prc_mutex);
1277                         prunlock(pnp);
1278                         error = pr_wait(pcp, rqtp, timecheck);
1279                         if (error)      /* -1 is timeout */
1280                                 return (error);
1281                         if ((error = prlock(pnp, ZNO)) != 0)
1282                                 return (error);
1283                         ASSERT(p == pcp->prc_proc);
1284                         t = prchoose(p);        /* returns locked t */
1285                         ASSERT(t != NULL);
1286                 }
1287                 thread_unlock(t);
1288         }
1289 
1290         ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1291             t != NULL && t->t_state != TS_ZOMB);
1292 
1293         return (0);
1294 }
1295 
1296 int
1297 pr_setrun(prnode_t *pnp, ulong_t flags)
1298 {
1299         prcommon_t *pcp = pnp->pr_common;
1300         proc_t *p = pcp->prc_proc;
1301         kthread_t *t;
1302         klwp_t *lwp;
1303 
1304         /*
1305          * Cannot set an lwp running if it is not stopped.
1306          * Also, no lwp other than the /proc agent lwp can
1307          * be set running so long as the /proc agent lwp exists.
1308          */
1309         t = pr_thread(pnp);     /* returns locked thread */
1310         if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1311             !(t->t_proc_flag & TP_PRSTOP)) ||
1312             (p->p_agenttp != NULL &&
1313             (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1314                 thread_unlock(t);
1315                 return (EBUSY);
1316         }
1317         thread_unlock(t);
1318         if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1319                 return (EINVAL);
1320         lwp = ttolwp(t);
1321         if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1322                 /*
1323                  * Discard current siginfo_t, if any.
1324                  */
1325                 lwp->lwp_cursig = 0;
1326                 lwp->lwp_extsig = 0;
1327                 if (lwp->lwp_curinfo) {
1328                         siginfofree(lwp->lwp_curinfo);
1329                         lwp->lwp_curinfo = NULL;
1330                 }
1331         }
1332         if (flags & PRCFAULT)
1333                 lwp->lwp_curflt = 0;
1334         /*
1335          * We can't hold p->p_lock when we touch the lwp's registers.
1336          * It may be swapped out and we will get a page fault.
1337          */
1338         if (flags & PRSTEP) {
1339                 mutex_exit(&p->p_lock);
1340                 prstep(lwp, 0);
1341                 mutex_enter(&p->p_lock);
1342         }
1343         if (flags & PRSTOP) {
1344                 t->t_proc_flag |= TP_PRSTOP;
1345                 t->t_sig_check = 1;  /* do ISSIG */
1346         }
1347         if (flags & PRSABORT)
1348                 lwp->lwp_sysabort = 1;
1349         thread_lock(t);
1350         if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1351                 /*
1352                  * Here, we are dealing with a single lwp.
1353                  */
1354                 if (ISTOPPED(t)) {
1355                         t->t_schedflag |= TS_PSTART;
1356                         t->t_dtrace_stop = 0;
1357                         setrun_locked(t);
1358                 } else if (flags & PRSABORT) {
1359                         t->t_proc_flag &=
1360                             ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1361                         setrun_locked(t);
1362                 } else if (!(flags & PRSTOP)) {
1363                         t->t_proc_flag &=
1364                             ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1365                 }
1366                 thread_unlock(t);
1367         } else {
1368                 /*
1369                  * Here, we are dealing with the whole process.
1370                  */
1371                 if (ISTOPPED(t)) {
1372                         /*
1373                          * The representative lwp is stopped on an event
1374                          * of interest.  We demote it to PR_REQUESTED and
1375                          * choose another representative lwp.  If the new
1376                          * representative lwp is not stopped on an event of
1377                          * interest (other than PR_REQUESTED), we set the
1378                          * whole process running, else we leave the process
1379                          * stopped showing the next event of interest.
1380                          */
1381                         kthread_t *tx = NULL;
1382 
1383                         if (!(flags & PRSABORT) &&
1384                             t->t_whystop == PR_SYSENTRY &&
1385                             t->t_whatstop == SYS_lwp_exit)
1386                                 tx = t;         /* remember the exiting lwp */
1387                         t->t_whystop = PR_REQUESTED;
1388                         t->t_whatstop = 0;
1389                         thread_unlock(t);
1390                         t = prchoose(p);        /* returns locked t */
1391                         ASSERT(ISTOPPED(t) || VSTOPPED(t));
1392                         if (VSTOPPED(t) ||
1393                             t->t_whystop == PR_REQUESTED) {
1394                                 thread_unlock(t);
1395                                 allsetrun(p);
1396                         } else {
1397                                 thread_unlock(t);
1398                                 /*
1399                                  * As a special case, if the old representative
1400                                  * lwp was stopped on entry to _lwp_exit()
1401                                  * (and we are not aborting the system call),
1402                                  * we set the old representative lwp running.
1403                                  * We do this so that the next process stop
1404                                  * will find the exiting lwp gone.
1405                                  */
1406                                 if (tx != NULL) {
1407                                         thread_lock(tx);
1408                                         tx->t_schedflag |= TS_PSTART;
1409                                         t->t_dtrace_stop = 0;
1410                                         setrun_locked(tx);
1411                                         thread_unlock(tx);
1412                                 }
1413                         }
1414                 } else {
1415                         /*
1416                          * No event of interest; set all of the lwps running.
1417                          */
1418                         if (flags & PRSABORT) {
1419                                 t->t_proc_flag &=
1420                                     ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1421                                 setrun_locked(t);
1422                         }
1423                         thread_unlock(t);
1424                         allsetrun(p);
1425                 }
1426         }
1427         return (0);
1428 }
1429 
1430 /*
1431  * Wait until process/lwp stops or until timer expires.
1432  * Return EINTR for an interruption, -1 for timeout, else 0.
1433  */
1434 int
1435 pr_wait(prcommon_t *pcp,        /* prcommon referring to process/lwp */
1436     timestruc_t *ts,            /* absolute time of timeout, if any */
1437     int timecheck)
1438 {
1439         int rval;
1440 
1441         ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1442         rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1443         mutex_exit(&pcp->prc_mutex);
1444         switch (rval) {
1445         case 0:
1446                 return (EINTR);
1447         case -1:
1448                 return (-1);
1449         default:
1450                 return (0);
1451         }
1452 }
1453 
1454 /*
1455  * Make all threads in the process runnable.
1456  */
1457 void
1458 allsetrun(proc_t *p)
1459 {
1460         kthread_t *t;
1461 
1462         ASSERT(MUTEX_HELD(&p->p_lock));
1463 
1464         if ((t = p->p_tlist) != NULL) {
1465                 do {
1466                         thread_lock(t);
1467                         ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1468                         t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1469                         if (ISTOPPED(t)) {
1470                                 t->t_schedflag |= TS_PSTART;
1471                                 t->t_dtrace_stop = 0;
1472                                 setrun_locked(t);
1473                         }
1474                         thread_unlock(t);
1475                 } while ((t = t->t_forw) != p->p_tlist);
1476         }
1477 }
1478 
1479 /*
1480  * Wait for the process to die.
1481  * We do this after sending SIGKILL because we know it will
1482  * die soon and we want subsequent operations to return ENOENT.
1483  */
1484 void
1485 pr_wait_die(prnode_t *pnp)
1486 {
1487         proc_t *p;
1488 
1489         mutex_enter(&pidlock);
1490         while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1491                 if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1492                         break;
1493         }
1494         mutex_exit(&pidlock);
1495 }
1496 
1497 static void
1498 pr_settrace(proc_t *p, sigset_t *sp)
1499 {
1500         prdelset(sp, SIGKILL);
1501         prassignset(&p->p_sigmask, sp);
1502         if (!sigisempty(&p->p_sigmask))
1503                 p->p_proc_flag |= P_PR_TRACE;
1504         else if (prisempty(&p->p_fltmask)) {
1505                 user_t *up = PTOU(p);
1506                 if (up->u_systrap == 0)
1507                         p->p_proc_flag &= ~P_PR_TRACE;
1508         }
1509 }
1510 
1511 int
1512 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1513 {
1514         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1515         int sig = sip->si_signo;
1516         prcommon_t *pcp = pnp->pr_common;
1517         proc_t *p = pcp->prc_proc;
1518         kthread_t *t;
1519         klwp_t *lwp;
1520         int error = 0;
1521 
1522         t = pr_thread(pnp);     /* returns locked thread */
1523         thread_unlock(t);
1524         lwp = ttolwp(t);
1525         if (sig < 0 || sig >= nsig)
1526                 /* Zero allowed here */
1527                 error = EINVAL;
1528         else if (lwp->lwp_cursig == SIGKILL)
1529                 /* "can't happen", but just in case */
1530                 error = EBUSY;
1531         else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1532                 lwp->lwp_extsig = 0;
1533                 /*
1534                  * Discard current siginfo_t, if any.
1535                  */
1536                 if (lwp->lwp_curinfo) {
1537                         siginfofree(lwp->lwp_curinfo);
1538                         lwp->lwp_curinfo = NULL;
1539                 }
1540         } else {
1541                 kthread_t *tx;
1542                 sigqueue_t *sqp;
1543 
1544                 /* drop p_lock to do kmem_alloc(KM_SLEEP) */
1545                 mutex_exit(&p->p_lock);
1546                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1547                 mutex_enter(&p->p_lock);
1548 
1549                 if (lwp->lwp_curinfo == NULL)
1550                         lwp->lwp_curinfo = sqp;
1551                 else
1552                         kmem_free(sqp, sizeof (sigqueue_t));
1553                 /*
1554                  * Copy contents of info to current siginfo_t.
1555                  */
1556                 bcopy(sip, &lwp->lwp_curinfo->sq_info,
1557                     sizeof (lwp->lwp_curinfo->sq_info));
1558                 /*
1559                  * Prevent contents published by si_zoneid-unaware /proc
1560                  * consumers from being incorrectly filtered.  Because
1561                  * an uninitialized si_zoneid is the same as
1562                  * GLOBAL_ZONEID, this means that you can't pr_setsig a
1563                  * process in a non-global zone with a siginfo which
1564                  * appears to come from the global zone.
1565                  */
1566                 if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1567                         lwp->lwp_curinfo->sq_info.si_zoneid =
1568                             p->p_zone->zone_id;
1569                 /*
1570                  * Side-effects for SIGKILL and jobcontrol signals.
1571                  */
1572                 if (sig == SIGKILL) {
1573                         p->p_flag |= SKILLED;
1574                         p->p_flag &= ~SEXTKILLED;
1575                 } else if (sig == SIGCONT) {
1576                         p->p_flag |= SSCONT;
1577                         sigdelq(p, NULL, SIGSTOP);
1578                         sigdelq(p, NULL, SIGTSTP);
1579                         sigdelq(p, NULL, SIGTTOU);
1580                         sigdelq(p, NULL, SIGTTIN);
1581                         sigdiffset(&p->p_sig, &stopdefault);
1582                         sigdiffset(&p->p_extsig, &stopdefault);
1583                         if ((tx = p->p_tlist) != NULL) {
1584                                 do {
1585                                         sigdelq(p, tx, SIGSTOP);
1586                                         sigdelq(p, tx, SIGTSTP);
1587                                         sigdelq(p, tx, SIGTTOU);
1588                                         sigdelq(p, tx, SIGTTIN);
1589                                         sigdiffset(&tx->t_sig, &stopdefault);
1590                                         sigdiffset(&tx->t_extsig, &stopdefault);
1591                                 } while ((tx = tx->t_forw) != p->p_tlist);
1592                         }
1593                 } else if (sigismember(&stopdefault, sig)) {
1594                         if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1595                             (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1596                                 p->p_flag &= ~SSCONT;
1597                         sigdelq(p, NULL, SIGCONT);
1598                         sigdelset(&p->p_sig, SIGCONT);
1599                         sigdelset(&p->p_extsig, SIGCONT);
1600                         if ((tx = p->p_tlist) != NULL) {
1601                                 do {
1602                                         sigdelq(p, tx, SIGCONT);
1603                                         sigdelset(&tx->t_sig, SIGCONT);
1604                                         sigdelset(&tx->t_extsig, SIGCONT);
1605                                 } while ((tx = tx->t_forw) != p->p_tlist);
1606                         }
1607                 }
1608                 thread_lock(t);
1609                 if (ISWAKEABLE(t) || ISWAITING(t)) {
1610                         /* Set signaled sleeping/waiting lwp running */
1611                         setrun_locked(t);
1612                 } else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1613                         /* If SIGKILL, set stopped lwp running */
1614                         p->p_stopsig = 0;
1615                         t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
1616                         t->t_dtrace_stop = 0;
1617                         setrun_locked(t);
1618                 }
1619                 t->t_sig_check = 1;  /* so ISSIG will be done */
1620                 thread_unlock(t);
1621                 /*
1622                  * More jobcontrol side-effects.
1623                  */
1624                 if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1625                         p->p_stopsig = 0;
1626                         do {
1627                                 thread_lock(tx);
1628                                 if (tx->t_state == TS_STOPPED &&
1629                                     tx->t_whystop == PR_JOBCONTROL) {
1630                                         tx->t_schedflag |= TS_XSTART;
1631                                         setrun_locked(tx);
1632                                 }
1633                                 thread_unlock(tx);
1634                         } while ((tx = tx->t_forw) != p->p_tlist);
1635                 }
1636         }
1637         return (error);
1638 }
1639 
1640 int
1641 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1642 {
1643         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1644         prcommon_t *pcp = pnp->pr_common;
1645         proc_t *p = pcp->prc_proc;
1646         k_siginfo_t info;
1647 
1648         if (sig <= 0 || sig >= nsig)
1649                 return (EINVAL);
1650 
1651         bzero(&info, sizeof (info));
1652         info.si_signo = sig;
1653         info.si_code = SI_USER;
1654         info.si_pid = curproc->p_pid;
1655         info.si_ctid = PRCTID(curproc);
1656         info.si_zoneid = getzoneid();
1657         info.si_uid = crgetruid(cr);
1658         sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1659             pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1660 
1661         return (0);
1662 }
1663 
1664 int
1665 pr_unkill(prnode_t *pnp, int sig)
1666 {
1667         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1668         prcommon_t *pcp = pnp->pr_common;
1669         proc_t *p = pcp->prc_proc;
1670         sigqueue_t *infop = NULL;
1671 
1672         if (sig <= 0 || sig >= nsig || sig == SIGKILL)
1673                 return (EINVAL);
1674 
1675         if (pcp->prc_flags & PRC_LWP)
1676                 sigdeq(p, pcp->prc_thread, sig, &infop);
1677         else
1678                 sigdeq(p, NULL, sig, &infop);
1679 
1680         if (infop)
1681                 siginfofree(infop);
1682 
1683         return (0);
1684 }
1685 
1686 int
1687 pr_nice(proc_t *p, int nice, cred_t *cr)
1688 {
1689         kthread_t *t;
1690         int err;
1691         int error = 0;
1692 
1693         t = p->p_tlist;
1694         do {
1695                 ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1696                 err = CL_DONICE(t, cr, nice, (int *)NULL);
1697                 schedctl_set_cidpri(t);
1698                 if (error == 0)
1699                         error = err;
1700         } while ((t = t->t_forw) != p->p_tlist);
1701 
1702         return (error);
1703 }
1704 
1705 void
1706 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1707 {
1708         user_t *up = PTOU(p);
1709 
1710         if (entry) {
1711                 prassignset(&up->u_entrymask, sysset);
1712         } else {
1713                 prassignset(&up->u_exitmask, sysset);
1714         }
1715         if (!prisempty(&up->u_entrymask) ||
1716             !prisempty(&up->u_exitmask)) {
1717                 up->u_systrap = 1;
1718                 p->p_proc_flag |= P_PR_TRACE;
1719                 set_proc_sys(p);        /* set pre and post-sys flags */
1720         } else {
1721                 up->u_systrap = 0;
1722                 if (sigisempty(&p->p_sigmask) &&
1723                     prisempty(&p->p_fltmask))
1724                         p->p_proc_flag &= ~P_PR_TRACE;
1725         }
1726 }
1727 
1728 #define ALLFLAGS        \
1729         (PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1730 
1731 int
1732 pr_set(proc_t *p, long flags)
1733 {
1734         if ((p->p_flag & SSYS) || p->p_as == &kas)
1735                 return (EBUSY);
1736 
1737         if (flags & ~ALLFLAGS)
1738                 return (EINVAL);
1739 
1740         if (flags & PR_FORK)
1741                 p->p_proc_flag |= P_PR_FORK;
1742         if (flags & PR_RLC)
1743                 p->p_proc_flag |= P_PR_RUNLCL;
1744         if (flags & PR_KLC)
1745                 p->p_proc_flag |= P_PR_KILLCL;
1746         if (flags & PR_ASYNC)
1747                 p->p_proc_flag |= P_PR_ASYNC;
1748         if (flags & PR_BPTADJ)
1749                 p->p_proc_flag |= P_PR_BPTADJ;
1750         if (flags & PR_MSACCT)
1751                 if ((p->p_flag & SMSACCT) == 0)
1752                         estimate_msacct(p->p_tlist, gethrtime());
1753         if (flags & PR_MSFORK)
1754                 p->p_flag |= SMSFORK;
1755         if (flags & PR_PTRACE) {
1756                 p->p_proc_flag |= P_PR_PTRACE;
1757                 /* ptraced process must die if parent dead */
1758                 if (p->p_ppid == 1)
1759                         sigtoproc(p, NULL, SIGKILL);
1760         }
1761 
1762         return (0);
1763 }
1764 
1765 int
1766 pr_unset(proc_t *p, long flags)
1767 {
1768         if ((p->p_flag & SSYS) || p->p_as == &kas)
1769                 return (EBUSY);
1770 
1771         if (flags & ~ALLFLAGS)
1772                 return (EINVAL);
1773 
1774         if (flags & PR_FORK)
1775                 p->p_proc_flag &= ~P_PR_FORK;
1776         if (flags & PR_RLC)
1777                 p->p_proc_flag &= ~P_PR_RUNLCL;
1778         if (flags & PR_KLC)
1779                 p->p_proc_flag &= ~P_PR_KILLCL;
1780         if (flags & PR_ASYNC)
1781                 p->p_proc_flag &= ~P_PR_ASYNC;
1782         if (flags & PR_BPTADJ)
1783                 p->p_proc_flag &= ~P_PR_BPTADJ;
1784         if (flags & PR_MSACCT)
1785                 disable_msacct(p);
1786         if (flags & PR_MSFORK)
1787                 p->p_flag &= ~SMSFORK;
1788         if (flags & PR_PTRACE)
1789                 p->p_proc_flag &= ~P_PR_PTRACE;
1790 
1791         return (0);
1792 }
1793 
1794 static int
1795 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1796 {
1797         proc_t *p = pnp->pr_common->prc_proc;
1798         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1799 
1800         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1801                 thread_unlock(t);
1802                 return (EBUSY);
1803         }
1804         if (!prhasfp()) {
1805                 thread_unlock(t);
1806                 return (EINVAL);        /* No FP support */
1807         }
1808 
1809         /* drop p_lock while touching the lwp's stack */
1810         thread_unlock(t);
1811         mutex_exit(&p->p_lock);
1812         prsetprfpregs(ttolwp(t), prfpregset);
1813         mutex_enter(&p->p_lock);
1814 
1815         return (0);
1816 }
1817 
1818 #ifdef  _SYSCALL32_IMPL
1819 static int
1820 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1821 {
1822         proc_t *p = pnp->pr_common->prc_proc;
1823         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1824 
1825         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1826                 thread_unlock(t);
1827                 return (EBUSY);
1828         }
1829         if (!prhasfp()) {
1830                 thread_unlock(t);
1831                 return (EINVAL);        /* No FP support */
1832         }
1833 
1834         /* drop p_lock while touching the lwp's stack */
1835         thread_unlock(t);
1836         mutex_exit(&p->p_lock);
1837         prsetprfpregs32(ttolwp(t), prfpregset);
1838         mutex_enter(&p->p_lock);
1839 
1840         return (0);
1841 }
1842 #endif  /* _SYSCALL32_IMPL */
1843 
1844 /* ARGSUSED */
1845 static int
1846 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1847 {
1848         int error;
1849         proc_t *p = pnp->pr_common->prc_proc;
1850         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1851 
1852         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1853                 thread_unlock(t);
1854                 return (EBUSY);
1855         }
1856         thread_unlock(t);
1857 
1858         if (!prhasx(p))
1859                 return (EINVAL);        /* No extra register support */
1860 
1861         /* drop p_lock while touching the lwp's stack */
1862         mutex_exit(&p->p_lock);
1863         error = prsetprxregs(ttolwp(t), prxregset);
1864         mutex_enter(&p->p_lock);
1865 
1866         return (error);
1867 }
1868 
1869 static int
1870 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1871 {
1872         proc_t *p = pnp->pr_common->prc_proc;
1873         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1874 
1875         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1876                 thread_unlock(t);
1877                 return (EBUSY);
1878         }
1879 
1880         /* drop p_lock while touching the lwp's stack */
1881         thread_unlock(t);
1882         mutex_exit(&p->p_lock);
1883         prsvaddr(ttolwp(t), vaddr);
1884         mutex_enter(&p->p_lock);
1885 
1886         return (0);
1887 }
1888 
1889 void
1890 pr_sethold(prnode_t *pnp, sigset_t *sp)
1891 {
1892         proc_t *p = pnp->pr_common->prc_proc;
1893         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1894 
1895         schedctl_finish_sigblock(t);
1896         sigutok(sp, &t->t_hold);
1897         if (ISWAKEABLE(t) &&
1898             (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1899                 setrun_locked(t);
1900         t->t_sig_check = 1;  /* so thread will see new holdmask */
1901         thread_unlock(t);
1902 }
1903 
1904 void
1905 pr_setfault(proc_t *p, fltset_t *fltp)
1906 {
1907         prassignset(&p->p_fltmask, fltp);
1908         if (!prisempty(&p->p_fltmask))
1909                 p->p_proc_flag |= P_PR_TRACE;
1910         else if (sigisempty(&p->p_sigmask)) {
1911                 user_t *up = PTOU(p);
1912                 if (up->u_systrap == 0)
1913                         p->p_proc_flag &= ~P_PR_TRACE;
1914         }
1915 }
1916 
1917 static int
1918 pr_clearsig(prnode_t *pnp)
1919 {
1920         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1921         klwp_t *lwp = ttolwp(t);
1922 
1923         thread_unlock(t);
1924         if (lwp->lwp_cursig == SIGKILL)
1925                 return (EBUSY);
1926 
1927         /*
1928          * Discard current siginfo_t, if any.
1929          */
1930         lwp->lwp_cursig = 0;
1931         lwp->lwp_extsig = 0;
1932         if (lwp->lwp_curinfo) {
1933                 siginfofree(lwp->lwp_curinfo);
1934                 lwp->lwp_curinfo = NULL;
1935         }
1936 
1937         return (0);
1938 }
1939 
1940 static int
1941 pr_clearflt(prnode_t *pnp)
1942 {
1943         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1944 
1945         thread_unlock(t);
1946         ttolwp(t)->lwp_curflt = 0;
1947 
1948         return (0);
1949 }
1950 
1951 static int
1952 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1953 {
1954         proc_t *p = pnp->pr_common->prc_proc;
1955         struct as *as = p->p_as;
1956         uintptr_t vaddr = pwp->pr_vaddr;
1957         size_t size = pwp->pr_size;
1958         int wflags = pwp->pr_wflags;
1959         ulong_t newpage = 0;
1960         struct watched_area *pwa;
1961         int error;
1962 
1963         *unlocked = 0;
1964 
1965         /*
1966          * Can't apply to a system process.
1967          */
1968         if ((p->p_flag & SSYS) || p->p_as == &kas)
1969                 return (EBUSY);
1970 
1971         /*
1972          * Verify that the address range does not wrap
1973          * and that only the proper flags were specified.
1974          */
1975         if ((wflags & ~WA_TRAPAFTER) == 0)
1976                 size = 0;
1977         if (vaddr + size < vaddr ||
1978             (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1979             ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1980                 return (EINVAL);
1981 
1982         /*
1983          * Don't let the address range go above as->a_userlimit.
1984          * There is no error here, just a limitation.
1985          */
1986         if (vaddr >= (uintptr_t)as->a_userlimit)
1987                 return (0);
1988         if (vaddr + size > (uintptr_t)as->a_userlimit)
1989                 size = (uintptr_t)as->a_userlimit - vaddr;
1990 
1991         /*
1992          * Compute maximum number of pages this will add.
1993          */
1994         if ((wflags & ~WA_TRAPAFTER) != 0) {
1995                 ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1996                 newpage = btopr(pagespan);
1997                 if (newpage > 2 * prnwatch)
1998                         return (E2BIG);
1999         }
2000 
2001         /*
2002          * Force the process to be fully stopped.
2003          */
2004         if (p == curproc) {
2005                 prunlock(pnp);
2006                 while (holdwatch() != 0)
2007                         continue;
2008                 if ((error = prlock(pnp, ZNO)) != 0) {
2009                         continuelwps(p);
2010                         *unlocked = 1;
2011                         return (error);
2012                 }
2013         } else {
2014                 pauselwps(p);
2015                 while (pr_allstopped(p, 0) > 0) {
2016                         /*
2017                          * This cv/mutex pair is persistent even
2018                          * if the process disappears after we
2019                          * unmark it and drop p->p_lock.
2020                          */
2021                         kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2022                         kmutex_t *mp = &p->p_lock;
2023 
2024                         prunmark(p);
2025                         (void) cv_wait(cv, mp);
2026                         mutex_exit(mp);
2027                         if ((error = prlock(pnp, ZNO)) != 0) {
2028                                 /*
2029                                  * Unpause the process if it exists.
2030                                  */
2031                                 p = pr_p_lock(pnp);
2032                                 mutex_exit(&pr_pidlock);
2033                                 if (p != NULL) {
2034                                         unpauselwps(p);
2035                                         prunlock(pnp);
2036                                 }
2037                                 *unlocked = 1;
2038                                 return (error);
2039                         }
2040                 }
2041         }
2042 
2043         /*
2044          * Drop p->p_lock in order to perform the rest of this.
2045          * The process is still locked with the P_PR_LOCK flag.
2046          */
2047         mutex_exit(&p->p_lock);
2048 
2049         pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
2050         pwa->wa_vaddr = (caddr_t)vaddr;
2051         pwa->wa_eaddr = (caddr_t)vaddr + size;
2052         pwa->wa_flags = (ulong_t)wflags;
2053 
2054         error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
2055             clear_watched_area(p, pwa) : set_watched_area(p, pwa);
2056 
2057         if (p == curproc) {
2058                 setallwatch();
2059                 mutex_enter(&p->p_lock);
2060                 continuelwps(p);
2061         } else {
2062                 mutex_enter(&p->p_lock);
2063                 unpauselwps(p);
2064         }
2065 
2066         return (error);
2067 }
2068 
2069 /* jobcontrol stopped, but with a /proc directed stop in effect */
2070 #define JDSTOPPED(t)    \
2071         ((t)->t_state == TS_STOPPED && \
2072         (t)->t_whystop == PR_JOBCONTROL && \
2073         ((t)->t_proc_flag & TP_PRSTOP))
2074 
2075 /*
2076  * pr_agent() creates the agent lwp. If the process is exiting while
2077  * we are creating an agent lwp, then exitlwps() waits until the
2078  * agent has been created using prbarrier().
2079  */
2080 static int
2081 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
2082 {
2083         proc_t *p = pnp->pr_common->prc_proc;
2084         prcommon_t *pcp;
2085         kthread_t *t;
2086         kthread_t *ct;
2087         klwp_t *clwp;
2088         k_sigset_t smask;
2089         int cid;
2090         void *bufp = NULL;
2091         int error;
2092 
2093         *unlocked = 0;
2094 
2095         /*
2096          * Cannot create the /proc agent lwp if :-
2097          * - the process is not fully stopped or directed to stop.
2098          * - there is an agent lwp already.
2099          * - the process has been killed.
2100          * - the process is exiting.
2101          * - it's a vfork(2) parent.
2102          */
2103         t = prchoose(p);        /* returns locked thread */
2104         ASSERT(t != NULL);
2105 
2106         if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
2107             p->p_agenttp != NULL ||
2108             (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
2109                 thread_unlock(t);
2110                 return (EBUSY);
2111         }
2112 
2113         thread_unlock(t);
2114         mutex_exit(&p->p_lock);
2115 
2116         sigfillset(&smask);
2117         sigdiffset(&smask, &cantmask);
2118         clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2119             t->t_pri, &smask, NOCLASS, 0);
2120         if (clwp == NULL) {
2121                 mutex_enter(&p->p_lock);
2122                 return (ENOMEM);
2123         }
2124         prsetprregs(clwp, prgregset, 1);
2125 
2126         /*
2127          * Because abandoning the agent inside the target process leads to
2128          * a state that is essentially undebuggable, we record the psinfo of
2129          * the process creating the agent and hang that off of the lwp.
2130          */
2131         clwp->lwp_spymaster = kmem_zalloc(sizeof (psinfo_t), KM_SLEEP);
2132         mutex_enter(&curproc->p_lock);
2133         prgetpsinfo(curproc, clwp->lwp_spymaster);
2134         mutex_exit(&curproc->p_lock);
2135 
2136         /*
2137          * We overload pr_time in the spymaster to denote the time at which the
2138          * agent was created.
2139          */
2140         gethrestime(&clwp->lwp_spymaster->pr_time);
2141 
2142 retry:
2143         cid = t->t_cid;
2144         (void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2145         mutex_enter(&p->p_lock);
2146         if (cid != t->t_cid) {
2147                 /*
2148                  * Someone just changed this thread's scheduling class,
2149                  * so try pre-allocating the buffer again.  Hopefully we
2150                  * don't hit this often.
2151                  */
2152                 mutex_exit(&p->p_lock);
2153                 CL_FREE(cid, bufp);
2154                 goto retry;
2155         }
2156 
2157         clwp->lwp_ap = clwp->lwp_arg;
2158         clwp->lwp_eosys = NORMALRETURN;
2159         ct = lwptot(clwp);
2160         ct->t_clfuncs = t->t_clfuncs;
2161         CL_FORK(t, ct, bufp);
2162         ct->t_cid = t->t_cid;
2163         ct->t_proc_flag |= TP_PRSTOP;
2164         /*
2165          * Setting t_sysnum to zero causes post_syscall()
2166          * to bypass all syscall checks and go directly to
2167          *      if (issig()) psig();
2168          * so that the agent lwp will stop in issig_forreal()
2169          * showing PR_REQUESTED.
2170          */
2171         ct->t_sysnum = 0;
2172         ct->t_post_sys = 1;
2173         ct->t_sig_check = 1;
2174         p->p_agenttp = ct;
2175         ct->t_proc_flag &= ~TP_HOLDLWP;
2176 
2177         pcp = pnp->pr_pcommon;
2178         mutex_enter(&pcp->prc_mutex);
2179 
2180         lwp_create_done(ct);
2181 
2182         /*
2183          * Don't return until the agent is stopped on PR_REQUESTED.
2184          */
2185 
2186         for (;;) {
2187                 prunlock(pnp);
2188                 *unlocked = 1;
2189 
2190                 /*
2191                  * Wait for the agent to stop and notify us.
2192                  * If we've been interrupted, return that information.
2193                  */
2194                 error = pr_wait(pcp, NULL, 0);
2195                 if (error == EINTR) {
2196                         error = 0;
2197                         break;
2198                 }
2199 
2200                 /*
2201                  * Confirm that the agent LWP has stopped.
2202                  */
2203 
2204                 if ((error = prlock(pnp, ZNO)) != 0)
2205                         break;
2206                 *unlocked = 0;
2207 
2208                 /*
2209                  * Since we dropped the lock on the process, the agent
2210                  * may have disappeared or changed. Grab the current
2211                  * agent and check fail if it has disappeared.
2212                  */
2213                 if ((ct = p->p_agenttp) == NULL) {
2214                         error = ENOENT;
2215                         break;
2216                 }
2217 
2218                 mutex_enter(&pcp->prc_mutex);
2219                 thread_lock(ct);
2220 
2221                 if (ISTOPPED(ct)) {
2222                         thread_unlock(ct);
2223                         mutex_exit(&pcp->prc_mutex);
2224                         break;
2225                 }
2226 
2227                 thread_unlock(ct);
2228         }
2229 
2230         return (error ? error : -1);
2231 }
2232 
2233 static int
2234 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2235 {
2236         caddr_t base = (caddr_t)pio->pio_base;
2237         size_t cnt = pio->pio_len;
2238         uintptr_t offset = (uintptr_t)pio->pio_offset;
2239         struct uio auio;
2240         struct iovec aiov;
2241         int error = 0;
2242 
2243         if ((p->p_flag & SSYS) || p->p_as == &kas)
2244                 error = EIO;
2245         else if ((base + cnt) < base || (offset + cnt) < offset)
2246                 error = EINVAL;
2247         else if (cnt != 0) {
2248                 aiov.iov_base = base;
2249                 aiov.iov_len = cnt;
2250 
2251                 auio.uio_loffset = offset;
2252                 auio.uio_iov = &aiov;
2253                 auio.uio_iovcnt = 1;
2254                 auio.uio_resid = cnt;
2255                 auio.uio_segflg = UIO_USERSPACE;
2256                 auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2257                 auio.uio_fmode = FREAD|FWRITE;
2258                 auio.uio_extflg = UIO_COPY_DEFAULT;
2259 
2260                 mutex_exit(&p->p_lock);
2261                 error = prusrio(p, rw, &auio, 0);
2262                 mutex_enter(&p->p_lock);
2263 
2264                 /*
2265                  * We have no way to return the i/o count,
2266                  * like read() or write() would do, so we
2267                  * return an error if the i/o was truncated.
2268                  */
2269                 if (auio.uio_resid != 0 && error == 0)
2270                         error = EIO;
2271         }
2272 
2273         return (error);
2274 }
2275 
2276 static int
2277 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2278 {
2279         kthread_t *t;
2280         cred_t *oldcred;
2281         cred_t *newcred;
2282         uid_t oldruid;
2283         int error;
2284         zone_t *zone = crgetzone(cr);
2285 
2286         if (!VALID_UID(prcred->pr_euid, zone) ||
2287             !VALID_UID(prcred->pr_ruid, zone) ||
2288             !VALID_UID(prcred->pr_suid, zone) ||
2289             !VALID_GID(prcred->pr_egid, zone) ||
2290             !VALID_GID(prcred->pr_rgid, zone) ||
2291             !VALID_GID(prcred->pr_sgid, zone))
2292                 return (EINVAL);
2293 
2294         if (dogrps) {
2295                 int ngrp = prcred->pr_ngroups;
2296                 int i;
2297 
2298                 if (ngrp < 0 || ngrp > ngroups_max)
2299                         return (EINVAL);
2300 
2301                 for (i = 0; i < ngrp; i++) {
2302                         if (!VALID_GID(prcred->pr_groups[i], zone))
2303                                 return (EINVAL);
2304                 }
2305         }
2306 
2307         error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2308 
2309         if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2310                 error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2311 
2312         if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2313             prcred->pr_suid != prcred->pr_ruid)
2314                 error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2315 
2316         if (error)
2317                 return (error);
2318 
2319         mutex_exit(&p->p_lock);
2320 
2321         /* hold old cred so it doesn't disappear while we dup it */
2322         mutex_enter(&p->p_crlock);
2323         crhold(oldcred = p->p_cred);
2324         mutex_exit(&p->p_crlock);
2325         newcred = crdup(oldcred);
2326         oldruid = crgetruid(oldcred);
2327         crfree(oldcred);
2328 
2329         /* Error checking done above */
2330         (void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2331             prcred->pr_suid);
2332         (void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2333             prcred->pr_sgid);
2334 
2335         if (dogrps) {
2336                 (void) crsetgroups(newcred, prcred->pr_ngroups,
2337                     prcred->pr_groups);
2338 
2339         }
2340 
2341         mutex_enter(&p->p_crlock);
2342         oldcred = p->p_cred;
2343         p->p_cred = newcred;
2344         mutex_exit(&p->p_crlock);
2345         crfree(oldcred);
2346 
2347         /*
2348          * Keep count of processes per uid consistent.
2349          */
2350         if (oldruid != prcred->pr_ruid) {
2351                 zoneid_t zoneid = crgetzoneid(newcred);
2352 
2353                 mutex_enter(&pidlock);
2354                 upcount_dec(oldruid, zoneid);
2355                 upcount_inc(prcred->pr_ruid, zoneid);
2356                 mutex_exit(&pidlock);
2357         }
2358 
2359         /*
2360          * Broadcast the cred change to the threads.
2361          */
2362         mutex_enter(&p->p_lock);
2363         t = p->p_tlist;
2364         do {
2365                 t->t_pre_sys = 1; /* so syscall will get new cred */
2366         } while ((t = t->t_forw) != p->p_tlist);
2367 
2368         return (0);
2369 }
2370 
2371 /*
2372  * Change process credentials to specified zone.  Used to temporarily
2373  * set a process to run in the global zone; only transitions between
2374  * the process's actual zone and the global zone are allowed.
2375  */
2376 static int
2377 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2378 {
2379         kthread_t *t;
2380         cred_t *oldcred;
2381         cred_t *newcred;
2382         zone_t *zptr;
2383         zoneid_t oldzoneid;
2384 
2385         if (secpolicy_zone_config(cr) != 0)
2386                 return (EPERM);
2387         if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2388                 return (EINVAL);
2389         /*
2390          * We cannot hold p_lock when we call zone_find_by_id since that can
2391          * lead to a deadlock. zone_find_by_id() takes zonehash_lock.
2392          * zone_enter() can hold the zonehash_lock and needs p_lock when it
2393          * calls task_join.
2394          */
2395         mutex_exit(&p->p_lock);
2396         if ((zptr = zone_find_by_id(zoneid)) == NULL) {
2397                 mutex_enter(&p->p_lock);
2398                 return (EINVAL);
2399         }
2400         mutex_enter(&p->p_crlock);
2401         oldcred = p->p_cred;
2402         crhold(oldcred);
2403         mutex_exit(&p->p_crlock);
2404         newcred = crdup(oldcred);
2405         oldzoneid = crgetzoneid(oldcred);
2406         crfree(oldcred);
2407 
2408         crsetzone(newcred, zptr);
2409         zone_rele(zptr);
2410 
2411         mutex_enter(&p->p_crlock);
2412         oldcred = p->p_cred;
2413         p->p_cred = newcred;
2414         mutex_exit(&p->p_crlock);
2415         crfree(oldcred);
2416 
2417         /*
2418          * The target process is changing zones (according to its cred), so
2419          * update the per-zone upcounts, which are based on process creds.
2420          */
2421         if (oldzoneid != zoneid) {
2422                 uid_t ruid = crgetruid(newcred);
2423 
2424                 mutex_enter(&pidlock);
2425                 upcount_dec(ruid, oldzoneid);
2426                 upcount_inc(ruid, zoneid);
2427                 mutex_exit(&pidlock);
2428         }
2429         /*
2430          * Broadcast the cred change to the threads.
2431          */
2432         mutex_enter(&p->p_lock);
2433         t = p->p_tlist;
2434         do {
2435                 t->t_pre_sys = 1;    /* so syscall will get new cred */
2436         } while ((t = t->t_forw) != p->p_tlist);
2437 
2438         return (0);
2439 }
2440 
2441 static int
2442 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2443 {
2444         kthread_t *t;
2445         int err;
2446 
2447         ASSERT(MUTEX_HELD(&p->p_lock));
2448 
2449         if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2450                 /*
2451                  * Broadcast the cred change to the threads.
2452                  */
2453                 t = p->p_tlist;
2454                 do {
2455                         t->t_pre_sys = 1; /* so syscall will get new cred */
2456                 } while ((t = t->t_forw) != p->p_tlist);
2457         }
2458 
2459         return (err);
2460 }
2461 
2462 /*
2463  * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2464  * terminate or perform an exec(2).
2465  *
2466  * Returns 0 if the process is fully stopped except for the current thread (if
2467  * we are operating on our own process), 1 otherwise.
2468  *
2469  * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2470  * See holdwatch() for details.
2471  */
2472 int
2473 pr_allstopped(proc_t *p, int watchstop)
2474 {
2475         kthread_t *t;
2476         int rv = 0;
2477 
2478         ASSERT(MUTEX_HELD(&p->p_lock));
2479 
2480         if (p->p_flag & SVFWAIT) /* waiting for vfork'd child to exec */
2481                 return (-1);
2482 
2483         if ((t = p->p_tlist) != NULL) {
2484                 do {
2485                         if (t == curthread || VSTOPPED(t) ||
2486                             (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2487                                 continue;
2488                         thread_lock(t);
2489                         switch (t->t_state) {
2490                         case TS_ZOMB:
2491                         case TS_STOPPED:
2492                                 break;
2493                         case TS_SLEEP:
2494                                 if (!(t->t_flag & T_WAKEABLE) ||
2495                                     t->t_wchan0 == NULL)
2496                                         rv = 1;
2497                                 break;
2498                         default:
2499                                 rv = 1;
2500                                 break;
2501                         }
2502                         thread_unlock(t);
2503                 } while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2504         }
2505 
2506         return (rv);
2507 }
2508 
2509 /*
2510  * Cause all lwps in the process to pause (for watchpoint operations).
2511  */
2512 static void
2513 pauselwps(proc_t *p)
2514 {
2515         kthread_t *t;
2516 
2517         ASSERT(MUTEX_HELD(&p->p_lock));
2518         ASSERT(p != curproc);
2519 
2520         if ((t = p->p_tlist) != NULL) {
2521                 do {
2522                         thread_lock(t);
2523                         t->t_proc_flag |= TP_PAUSE;
2524                         aston(t);
2525                         if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2526                             ISWAITING(t)) {
2527                                 setrun_locked(t);
2528                         }
2529                         prpokethread(t);
2530                         thread_unlock(t);
2531                 } while ((t = t->t_forw) != p->p_tlist);
2532         }
2533 }
2534 
2535 /*
2536  * undo the effects of pauselwps()
2537  */
2538 static void
2539 unpauselwps(proc_t *p)
2540 {
2541         kthread_t *t;
2542 
2543         ASSERT(MUTEX_HELD(&p->p_lock));
2544         ASSERT(p != curproc);
2545 
2546         if ((t = p->p_tlist) != NULL) {
2547                 do {
2548                         thread_lock(t);
2549                         t->t_proc_flag &= ~TP_PAUSE;
2550                         if (t->t_state == TS_STOPPED) {
2551                                 t->t_schedflag |= TS_UNPAUSE;
2552                                 t->t_dtrace_stop = 0;
2553                                 setrun_locked(t);
2554                         }
2555                         thread_unlock(t);
2556                 } while ((t = t->t_forw) != p->p_tlist);
2557         }
2558 }
2559 
2560 /*
2561  * Cancel all watched areas.  Called from prclose().
2562  */
2563 proc_t *
2564 pr_cancel_watch(prnode_t *pnp)
2565 {
2566         proc_t *p = pnp->pr_pcommon->prc_proc;
2567         struct as *as;
2568         kthread_t *t;
2569 
2570         ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2571 
2572         if (!pr_watch_active(p))
2573                 return (p);
2574 
2575         /*
2576          * Pause the process before dealing with the watchpoints.
2577          */
2578         if (p == curproc) {
2579                 prunlock(pnp);
2580                 while (holdwatch() != 0)
2581                         continue;
2582                 p = pr_p_lock(pnp);
2583                 mutex_exit(&pr_pidlock);
2584                 ASSERT(p == curproc);
2585         } else {
2586                 pauselwps(p);
2587                 while (p != NULL && pr_allstopped(p, 0) > 0) {
2588                         /*
2589                          * This cv/mutex pair is persistent even
2590                          * if the process disappears after we
2591                          * unmark it and drop p->p_lock.
2592                          */
2593                         kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2594                         kmutex_t *mp = &p->p_lock;
2595 
2596                         prunmark(p);
2597                         (void) cv_wait(cv, mp);
2598                         mutex_exit(mp);
2599                         p = pr_p_lock(pnp);  /* NULL if process disappeared */
2600                         mutex_exit(&pr_pidlock);
2601                 }
2602         }
2603 
2604         if (p == NULL)          /* the process disappeared */
2605                 return (NULL);
2606 
2607         ASSERT(p == pnp->pr_pcommon->prc_proc);
2608         ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2609 
2610         if (pr_watch_active(p)) {
2611                 pr_free_watchpoints(p);
2612                 if ((t = p->p_tlist) != NULL) {
2613                         do {
2614                                 watch_disable(t);
2615 
2616                         } while ((t = t->t_forw) != p->p_tlist);
2617                 }
2618         }
2619 
2620         if ((as = p->p_as) != NULL) {
2621                 avl_tree_t *tree;
2622                 struct watched_page *pwp;
2623 
2624                 /*
2625                  * If this is the parent of a vfork, the watched page
2626                  * list has been moved temporarily to p->p_wpage.
2627                  */
2628                 if (avl_numnodes(&p->p_wpage) != 0)
2629                         tree = &p->p_wpage;
2630                 else
2631                         tree = &as->a_wpage;
2632 
2633                 mutex_exit(&p->p_lock);
2634                 AS_LOCK_ENTER(as, RW_WRITER);
2635 
2636                 for (pwp = avl_first(tree); pwp != NULL;
2637                     pwp = AVL_NEXT(tree, pwp)) {
2638                         pwp->wp_read = 0;
2639                         pwp->wp_write = 0;
2640                         pwp->wp_exec = 0;
2641                         if ((pwp->wp_flags & WP_SETPROT) == 0) {
2642                                 pwp->wp_flags |= WP_SETPROT;
2643                                 pwp->wp_prot = pwp->wp_oprot;
2644                                 pwp->wp_list = p->p_wprot;
2645                                 p->p_wprot = pwp;
2646                         }
2647                 }
2648 
2649                 AS_LOCK_EXIT(as);
2650                 mutex_enter(&p->p_lock);
2651         }
2652 
2653         /*
2654          * Unpause the process now.
2655          */
2656         if (p == curproc)
2657                 continuelwps(p);
2658         else
2659                 unpauselwps(p);
2660 
2661         return (p);
2662 }