Print this page
manifest


   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2015, Joyent, Inc.

  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/uio.h>
  33 #include <sys/param.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/cred.h>
  36 #include <sys/policy.h>
  37 #include <sys/debug.h>
  38 #include <sys/errno.h>
  39 #include <sys/file.h>
  40 #include <sys/inline.h>
  41 #include <sys/kmem.h>
  42 #include <sys/proc.h>
  43 #include <sys/brand.h>
  44 #include <sys/regset.h>
  45 #include <sys/sysmacros.h>
  46 #include <sys/systm.h>
  47 #include <sys/vfs.h>
  48 #include <sys/vnode.h>
  49 #include <sys/signal.h>
  50 #include <sys/auxv.h>
  51 #include <sys/user.h>
  52 #include <sys/class.h>
  53 #include <sys/fault.h>
  54 #include <sys/syscall.h>
  55 #include <sys/procfs.h>
  56 #include <sys/zone.h>
  57 #include <sys/copyops.h>
  58 #include <sys/schedctl.h>
  59 #include <vm/as.h>
  60 #include <vm/seg.h>
  61 #include <fs/proc/prdata.h>
  62 #include <sys/contract/process_impl.h>

  63 
  64 static  void    pr_settrace(proc_t *, sigset_t *);
  65 static  int     pr_setfpregs(prnode_t *, prfpregset_t *);
  66 #if defined(__sparc)
  67 static  int     pr_setxregs(prnode_t *, prxregset_t *);
  68 static  int     pr_setasrs(prnode_t *, asrset_t);
  69 #endif
  70 static  int     pr_setvaddr(prnode_t *, caddr_t);
  71 static  int     pr_clearsig(prnode_t *);
  72 static  int     pr_clearflt(prnode_t *);
  73 static  int     pr_watch(prnode_t *, prwatch_t *, int *);
  74 static  int     pr_agent(prnode_t *, prgregset_t, int *);
  75 static  int     pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
  76 static  int     pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
  77 static  int     pr_spriv(proc_t *, prpriv_t *, cred_t *);
  78 static  int     pr_szoneid(proc_t *, zoneid_t, cred_t *);
  79 static  void    pauselwps(proc_t *);
  80 static  void    unpauselwps(proc_t *);
  81 







  82 typedef union {
  83         long            sig;            /* PCKILL, PCUNKILL */
  84         long            nice;           /* PCNICE */
  85         long            timeo;          /* PCTWSTOP */
  86         ulong_t         flags;          /* PCRUN, PCSET, PCUNSET */
  87         caddr_t         vaddr;          /* PCSVADDR */
  88         siginfo_t       siginfo;        /* PCSSIG */
  89         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
  90         fltset_t        fltset;         /* PCSFAULT */
  91         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
  92         prgregset_t     prgregset;      /* PCSREG, PCAGENT */
  93         prfpregset_t    prfpregset;     /* PCSFPREG */
  94 #if defined(__sparc)
  95         prxregset_t     prxregset;      /* PCSXREG */
  96         asrset_t        asrset;         /* PCSASRS */
  97 #endif
  98         prwatch_t       prwatch;        /* PCWATCH */
  99         priovec_t       priovec;        /* PCREAD, PCWRITE */
 100         prcred_t        prcred;         /* PCSCRED */
 101         prpriv_t        prpriv;         /* PCSPRIV */
 102         long            przoneid;       /* PCSZONE */
 103 } arg_t;
 104 
 105 static  int     pr_control(long, arg_t *, prnode_t *, cred_t *);



 106 
 107 static size_t
 108 ctlsize(long cmd, size_t resid, arg_t *argp)












 109 {
 110         size_t size = sizeof (long);
 111         size_t rnd;
 112         int ngrp;

 113 
 114         switch (cmd) {
 115         case PCNULL:
 116         case PCSTOP:
 117         case PCDSTOP:
 118         case PCWSTOP:
 119         case PCCSIG:
 120         case PCCFAULT:
 121                 break;
 122         case PCSSIG:
 123                 size += sizeof (siginfo_t);
 124                 break;
 125         case PCTWSTOP:
 126                 size += sizeof (long);
 127                 break;
 128         case PCKILL:
 129         case PCUNKILL:
 130         case PCNICE:
 131                 size += sizeof (long);
 132                 break;
 133         case PCRUN:
 134         case PCSET:
 135         case PCUNSET:
 136                 size += sizeof (ulong_t);
 137                 break;
 138         case PCSVADDR:
 139                 size += sizeof (caddr_t);
 140                 break;
 141         case PCSTRACE:
 142         case PCSHOLD:
 143                 size += sizeof (sigset_t);
 144                 break;
 145         case PCSFAULT:
 146                 size += sizeof (fltset_t);
 147                 break;
 148         case PCSENTRY:
 149         case PCSEXIT:
 150                 size += sizeof (sysset_t);
 151                 break;
 152         case PCSREG:
 153         case PCAGENT:
 154                 size += sizeof (prgregset_t);
 155                 break;
 156         case PCSFPREG:
 157                 size += sizeof (prfpregset_t);
 158                 break;
 159 #if defined(__sparc)
 160         case PCSXREG:
 161                 size += sizeof (prxregset_t);
 162                 break;
 163         case PCSASRS:
 164                 size += sizeof (asrset_t);
 165                 break;




























 166 #endif
 167         case PCWATCH:
 168                 size += sizeof (prwatch_t);
 169                 break;
 170         case PCREAD:
 171         case PCWRITE:
 172                 size += sizeof (priovec_t);
 173                 break;
 174         case PCSCRED:
 175                 size += sizeof (prcred_t);
 176                 break;
 177         case PCSCREDX:

















 178                 /*
 179                  * We cannot derefence the pr_ngroups fields if it
 180                  * we don't have enough data.
 181                  */
 182                 if (resid < size + sizeof (prcred_t) - sizeof (gid_t))


 183                         return (0);
 184                 ngrp = argp->prcred.pr_ngroups;
 185                 if (ngrp < 0 || ngrp > ngroups_max)
 186                         return (0);
 187 
 188                 /* The result can be smaller than sizeof (prcred_t) */
 189                 size += sizeof (prcred_t) - sizeof (gid_t);
 190                 size += ngrp * sizeof (gid_t);
 191                 break;
 192         case PCSPRIV:
 193                 if (resid >= size + sizeof (prpriv_t))
 194                         size += priv_prgetprivsize(&argp->prpriv);
 195                 else
 196                         return (0);
 197                 break;
 198         case PCSZONE:
 199                 size += sizeof (long);
 200                 break;
 201         default:
 202                 return (0);
 203         }
 204 
 205         /* Round up to a multiple of long, unless exact amount written */
 206         if (size < resid) {
 207                 rnd = size & (sizeof (long) - 1);

 208 
 209                 if (rnd != 0)
 210                         size += sizeof (long) - rnd;





 211         }


 212 
 213         if (size > resid)








 214                 return (0);
 215         return (size);
 216 }
 217 























 218 /*
 219  * Control operations (lots).



















 220  */
 221 int
 222 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)


 223 {
 224 #define MY_BUFFER_SIZE \
 225                 100 > 1 + sizeof (arg_t) / sizeof (long) ? \
 226                 100 : 1 + sizeof (arg_t) / sizeof (long)
 227         long buf[MY_BUFFER_SIZE];
 228         long *bufp;
 229         size_t resid = 0;
 230         size_t size;
 231         prnode_t *pnp = VTOP(vp);
 232         int error;
 233         int locked = 0;
 234 
 235         while (uiop->uio_resid) {






 236                 /*
 237                  * Read several commands in one gulp.




 238                  */
 239                 bufp = buf;
 240                 if (resid) {    /* move incomplete command to front of buffer */
 241                         long *tail;
 242 
 243                         if (resid >= sizeof (buf))
 244                                 break;
 245                         tail = (long *)((char *)buf + sizeof (buf) - resid);
 246                         do {
 247                                 *bufp++ = *tail++;
 248                         } while ((resid -= sizeof (long)) != 0);
 249                 }
 250                 resid = sizeof (buf) - ((char *)bufp - (char *)buf);
 251                 if (resid > uiop->uio_resid)
 252                         resid = uiop->uio_resid;
 253                 if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
 254                         return (error);
 255                 resid += (char *)bufp - (char *)buf;
 256                 bufp = buf;
 257 
 258                 do {            /* loop over commands in buffer */
 259                         long cmd = bufp[0];
 260                         arg_t *argp = (arg_t *)&bufp[1];














 261 
 262                         size = ctlsize(cmd, resid, argp);
 263                         if (size == 0)  /* incomplete or invalid command */
 264                                 break;












 265                         /*
 266                          * Perform the specified control operation.


 267                          */
 268                         if (!locked) {
 269                                 if ((error = prlock(pnp, ZNO)) != 0)
 270                                         return (error);
 271                                 locked = 1;
 272                         }
 273                         if (error = pr_control(cmd, argp, pnp, cr)) {
 274                                 if (error == -1)        /* -1 is timeout */
 275                                         locked = 0;
 276                                 else
 277                                         return (error);

 278                         }
 279                         bufp = (long *)((char *)bufp + size);
 280                 } while ((resid -= size) != 0);
 281 
 282                 if (locked) {
 283                         prunlock(pnp);
 284                         locked = 0;
 285                 }







 286         }
 287         return (resid? EINVAL : 0);

































































































































 288 }
 289 
 290 static int
 291 pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
 292 {
 293         prcommon_t *pcp;
 294         proc_t *p;
 295         int unlocked;
 296         int error = 0;

 297 
 298         if (cmd == PCNULL)
 299                 return (0);
 300 
 301         pcp = pnp->pr_common;
 302         p = pcp->prc_proc;
 303         ASSERT(p != NULL);
 304 
 305         /* System processes defy control. */
 306         if (p->p_flag & SSYS) {
 307                 prunlock(pnp);
 308                 return (EBUSY);
 309         }
 310 
 311         switch (cmd) {
 312 
 313         default:
 314                 error = EINVAL;
 315                 break;
 316 


 405                 {
 406                         kthread_t *t = pr_thread(pnp);
 407 
 408                         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
 409                                 thread_unlock(t);
 410                                 error = EBUSY;
 411                         } else {
 412                                 thread_unlock(t);
 413                                 mutex_exit(&p->p_lock);
 414                                 prsetprregs(ttolwp(t), argp->prgregset, 0);
 415                                 mutex_enter(&p->p_lock);
 416                         }
 417                         break;
 418                 }
 419 
 420         case PCSFPREG:  /* set floating-point registers */
 421                 error = pr_setfpregs(pnp, &argp->prfpregset);
 422                 break;
 423 
 424         case PCSXREG:   /* set extra registers */
 425 #if defined(__sparc)
 426                 error = pr_setxregs(pnp, &argp->prxregset);
 427 #else
 428                 error = EINVAL;
 429 #endif
 430                 break;
 431 
 432 #if defined(__sparc)
 433         case PCSASRS:   /* set ancillary state registers */
 434                 error = pr_setasrs(pnp, argp->asrset);
 435                 break;
 436 #endif
 437 
 438         case PCSVADDR:  /* set virtual address at which to resume */
 439                 error = pr_setvaddr(pnp, argp->vaddr);
 440                 break;
 441 
 442         case PCSHOLD:   /* set signal-hold mask */
 443                 pr_sethold(pnp, &argp->sigset);
 444                 break;
 445 
 446         case PCSFAULT:  /* set mask of traced faults */
 447                 pr_setfault(p, &argp->fltset);
 448                 break;
 449 
 450         case PCCSIG:    /* clear current signal */
 451                 error = pr_clearsig(pnp);
 452                 break;
 453 
 454         case PCCFAULT:  /* clear current fault */
 455                 error = pr_clearflt(pnp);
 456                 break;
 457 


 476                 break;
 477 
 478         case PCSCRED:   /* set the process credentials */
 479         case PCSCREDX:
 480                 error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
 481                 break;
 482 
 483         case PCSPRIV:   /* set the process privileges */
 484                 error = pr_spriv(p, &argp->prpriv, cr);
 485                 break;
 486         case PCSZONE:   /* set the process's zoneid credentials */
 487                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
 488                 break;
 489         }
 490 
 491         if (error)
 492                 prunlock(pnp);
 493         return (error);
 494 }
 495 







 496 #ifdef _SYSCALL32_IMPL
 497 
 498 typedef union {
 499         int32_t         sig;            /* PCKILL, PCUNKILL */
 500         int32_t         nice;           /* PCNICE */
 501         int32_t         timeo;          /* PCTWSTOP */
 502         uint32_t        flags;          /* PCRUN, PCSET, PCUNSET */
 503         caddr32_t       vaddr;          /* PCSVADDR */
 504         siginfo32_t     siginfo;        /* PCSSIG */
 505         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
 506         fltset_t        fltset;         /* PCSFAULT */
 507         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
 508         prgregset32_t   prgregset;      /* PCSREG, PCAGENT */
 509         prfpregset32_t  prfpregset;     /* PCSFPREG */
 510 #if defined(__sparc)
 511         prxregset_t     prxregset;      /* PCSXREG */
 512 #endif
 513         prwatch32_t     prwatch;        /* PCWATCH */
 514         priovec32_t     priovec;        /* PCREAD, PCWRITE */
 515         prcred32_t      prcred;         /* PCSCRED */
 516         prpriv_t        prpriv;         /* PCSPRIV */
 517         int32_t         przoneid;       /* PCSZONE */
 518 } arg32_t;
 519 
 520 static  int     pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
 521 static  int     pr_setfpregs32(prnode_t *, prfpregset32_t *);
 522 
 523 /*
 524  * Note that while ctlsize32() can use argp, it must do so only in a way
 525  * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
 526  * to an array of 32-bit values and only 32-bit alignment is ensured.
 527  */
 528 static size_t
 529 ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
 530 {
 531         size_t size = sizeof (int32_t);
 532         size_t rnd;
 533         int ngrp;
 534 
 535         switch (cmd) {
 536         case PCNULL:
 537         case PCSTOP:
 538         case PCDSTOP:
 539         case PCWSTOP:
 540         case PCCSIG:
 541         case PCCFAULT:
 542                 break;
 543         case PCSSIG:
 544                 size += sizeof (siginfo32_t);
 545                 break;
 546         case PCTWSTOP:
 547                 size += sizeof (int32_t);
 548                 break;
 549         case PCKILL:
 550         case PCUNKILL:
 551         case PCNICE:
 552                 size += sizeof (int32_t);
 553                 break;
 554         case PCRUN:
 555         case PCSET:
 556         case PCUNSET:
 557                 size += sizeof (uint32_t);
 558                 break;
 559         case PCSVADDR:
 560                 size += sizeof (caddr32_t);
 561                 break;
 562         case PCSTRACE:
 563         case PCSHOLD:
 564                 size += sizeof (sigset_t);
 565                 break;
 566         case PCSFAULT:
 567                 size += sizeof (fltset_t);
 568                 break;
 569         case PCSENTRY:
 570         case PCSEXIT:
 571                 size += sizeof (sysset_t);
 572                 break;
 573         case PCSREG:
 574         case PCAGENT:
 575                 size += sizeof (prgregset32_t);
 576                 break;
 577         case PCSFPREG:
 578                 size += sizeof (prfpregset32_t);
 579                 break;
 580 #if defined(__sparc)
 581         case PCSXREG:
 582                 size += sizeof (prxregset_t);
 583                 break;
 584 #endif
 585         case PCWATCH:
 586                 size += sizeof (prwatch32_t);
 587                 break;
 588         case PCREAD:
 589         case PCWRITE:
 590                 size += sizeof (priovec32_t);
 591                 break;
 592         case PCSCRED:
 593                 size += sizeof (prcred32_t);
 594                 break;
 595         case PCSCREDX:
 596                 /*
 597                  * We cannot derefence the pr_ngroups fields if it
 598                  * we don't have enough data.
 599                  */
 600                 if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
 601                         return (0);
 602                 ngrp = argp->prcred.pr_ngroups;
 603                 if (ngrp < 0 || ngrp > ngroups_max)
 604                         return (0);
 605 
 606                 /* The result can be smaller than sizeof (prcred32_t) */
 607                 size += sizeof (prcred32_t) - sizeof (gid32_t);
 608                 size += ngrp * sizeof (gid32_t);
 609                 break;
 610         case PCSPRIV:
 611                 if (resid >= size + sizeof (prpriv_t))
 612                         size += priv_prgetprivsize(&argp->prpriv);
 613                 else
 614                         return (0);
 615                 break;
 616         case PCSZONE:
 617                 size += sizeof (int32_t);
 618                 break;
 619         default:
 620                 return (0);
 621         }
 622 
 623         /* Round up to a multiple of int32_t */
 624         rnd = size & (sizeof (int32_t) - 1);
 625 
 626         if (rnd != 0)
 627                 size += sizeof (int32_t) - rnd;
 628 
 629         if (size > resid)
 630                 return (0);
 631         return (size);
 632 }
 633 
 634 /*
 635  * Control operations (lots).


 636  */
 637 int
 638 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
 639 {
 640 #define MY_BUFFER_SIZE32 \
 641                 100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
 642                 100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
 643         int32_t buf[MY_BUFFER_SIZE32];
 644         int32_t *bufp;
 645         arg32_t arg;
 646         size_t resid = 0;
 647         size_t size;
 648         prnode_t *pnp = VTOP(vp);
 649         int error;
 650         int locked = 0;



















 651 
 652         while (uiop->uio_resid) {
 653                 /*
 654                  * Read several commands in one gulp.
 655                  */
 656                 bufp = buf;
 657                 if (resid) {    /* move incomplete command to front of buffer */
 658                         int32_t *tail;
 659 
 660                         if (resid >= sizeof (buf))
 661                                 break;
 662                         tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
 663                         do {
 664                                 *bufp++ = *tail++;
 665                         } while ((resid -= sizeof (int32_t)) != 0);
 666                 }
 667                 resid = sizeof (buf) - ((char *)bufp - (char *)buf);
 668                 if (resid > uiop->uio_resid)
 669                         resid = uiop->uio_resid;
 670                 if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
 671                         return (error);
 672                 resid += (char *)bufp - (char *)buf;
 673                 bufp = buf;
 674 
 675                 do {            /* loop over commands in buffer */
 676                         int32_t cmd = bufp[0];
 677                         arg32_t *argp = (arg32_t *)&bufp[1];
 678 
 679                         size = ctlsize32(cmd, resid, argp);
 680                         if (size == 0)  /* incomplete or invalid command */
 681                                 break;
 682                         /*
 683                          * Perform the specified control operation.
 684                          */
 685                         if (!locked) {
 686                                 if ((error = prlock(pnp, ZNO)) != 0)
 687                                         return (error);
 688                                 locked = 1;
 689                         }
 690 
 691                         /*
 692                          * Since some members of the arg32_t union contain
 693                          * 64-bit values (which must be 64-bit aligned), we
 694                          * can't simply pass a pointer to the structure as
 695                          * it may be unaligned. Note that we do pass the
 696                          * potentially unaligned structure to ctlsize32()
 697                          * above, but that uses it a way that makes no
 698                          * assumptions about alignment.
 699                          */
 700                         ASSERT(size - sizeof (cmd) <= sizeof (arg));
 701                         bcopy(argp, &arg, size - sizeof (cmd));
 702 
 703                         if (error = pr_control32(cmd, &arg, pnp, cr)) {
 704                                 if (error == -1)        /* -1 is timeout */
 705                                         locked = 0;
 706                                 else
 707                                         return (error);
 708                         }
 709                         bufp = (int32_t *)((char *)bufp + size);
 710                 } while ((resid -= size) != 0);
 711 
 712                 if (locked) {
 713                         prunlock(pnp);
 714                         locked = 0;
 715                 }
 716         }
 717         return (resid? EINVAL : 0);
 718 }
 719 
 720 static int
 721 pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
 722 {
 723         prcommon_t *pcp;
 724         proc_t *p;
 725         int unlocked;
 726         int error = 0;

 727 
 728         if (cmd == PCNULL)
 729                 return (0);
 730 
 731         pcp = pnp->pr_common;
 732         p = pcp->prc_proc;
 733         ASSERT(p != NULL);
 734 
 735         if (p->p_flag & SSYS) {
 736                 prunlock(pnp);
 737                 return (EBUSY);
 738         }
 739 
 740         switch (cmd) {
 741 
 742         default:
 743                 error = EINVAL;
 744                 break;
 745 
 746         case PCSTOP:    /* direct process or lwp to stop and wait for stop */


 853                                 klwp_t *lwp = ttolwp(t);
 854 
 855                                 thread_unlock(t);
 856                                 mutex_exit(&p->p_lock);
 857                                 prgregset_32ton(lwp, argp->prgregset,
 858                                     prgregset);
 859                                 prsetprregs(lwp, prgregset, 0);
 860                                 mutex_enter(&p->p_lock);
 861                         }
 862                 }
 863                 break;
 864 
 865         case PCSFPREG:  /* set floating-point registers */
 866                 if (PROCESS_NOT_32BIT(p))
 867                         error = EOVERFLOW;
 868                 else
 869                         error = pr_setfpregs32(pnp, &argp->prfpregset);
 870                 break;
 871 
 872         case PCSXREG:   /* set extra registers */
 873 #if defined(__sparc)
 874                 if (PROCESS_NOT_32BIT(p))
 875                         error = EOVERFLOW;
 876                 else
 877                         error = pr_setxregs(pnp, &argp->prxregset);
 878 #else
 879                 error = EINVAL;
 880 #endif
 881                 break;
 882 
 883         case PCSVADDR:  /* set virtual address at which to resume */
 884                 if (PROCESS_NOT_32BIT(p))
 885                         error = EOVERFLOW;
 886                 else
 887                         error = pr_setvaddr(pnp,
 888                             (caddr_t)(uintptr_t)argp->vaddr);
 889                 break;
 890 
 891         case PCSHOLD:   /* set signal-hold mask */
 892                 pr_sethold(pnp, &argp->sigset);
 893                 break;
 894 
 895         case PCSFAULT:  /* set mask of traced faults */
 896                 pr_setfault(p, &argp->fltset);
 897                 break;
 898 
 899         case PCCSIG:    /* clear current signal */
 900                 error = pr_clearsig(pnp);


 970 #endif
 971 
 972                         error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
 973                         break;
 974                 }
 975 
 976         case PCSPRIV:   /* set the process privileges */
 977                 error = pr_spriv(p, &argp->prpriv, cr);
 978                 break;
 979 
 980         case PCSZONE:   /* set the process's zoneid */
 981                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
 982                 break;
 983         }
 984 
 985         if (error)
 986                 prunlock(pnp);
 987         return (error);
 988 }
 989 






 990 #endif  /* _SYSCALL32_IMPL */
 991 
 992 /*
 993  * Return the specific or chosen thread/lwp for a control operation.
 994  * Returns with the thread locked via thread_lock(t).
 995  */
 996 kthread_t *
 997 pr_thread(prnode_t *pnp)
 998 {
 999         prcommon_t *pcp = pnp->pr_common;
1000         kthread_t *t;
1001 
1002         if (pcp->prc_flags & PRC_LWP) {
1003                 t = pcp->prc_thread;
1004                 ASSERT(t != NULL);
1005                 thread_lock(t);
1006         } else {
1007                 proc_t *p = pcp->prc_proc;
1008                 t = prchoose(p);        /* returns locked thread */
1009                 ASSERT(t != NULL);


1693 
1694         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1695                 thread_unlock(t);
1696                 return (EBUSY);
1697         }
1698         if (!prhasfp()) {
1699                 thread_unlock(t);
1700                 return (EINVAL);        /* No FP support */
1701         }
1702 
1703         /* drop p_lock while touching the lwp's stack */
1704         thread_unlock(t);
1705         mutex_exit(&p->p_lock);
1706         prsetprfpregs32(ttolwp(t), prfpregset);
1707         mutex_enter(&p->p_lock);
1708 
1709         return (0);
1710 }
1711 #endif  /* _SYSCALL32_IMPL */
1712 
1713 #if defined(__sparc)
1714 /* ARGSUSED */
1715 static int
1716 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1717 {

1718         proc_t *p = pnp->pr_common->prc_proc;
1719         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1720 
1721         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1722                 thread_unlock(t);
1723                 return (EBUSY);
1724         }
1725         thread_unlock(t);
1726 
1727         if (!prhasx(p))
1728                 return (EINVAL);        /* No extra register support */
1729 
1730         /* drop p_lock while touching the lwp's stack */
1731         mutex_exit(&p->p_lock);
1732         prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1733         mutex_enter(&p->p_lock);
1734 
1735         return (0);
1736 }
1737 
1738 static int
1739 pr_setasrs(prnode_t *pnp, asrset_t asrset)
1740 {
1741         proc_t *p = pnp->pr_common->prc_proc;
1742         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1743 
1744         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1745                 thread_unlock(t);
1746                 return (EBUSY);
1747         }
1748         thread_unlock(t);
1749 
1750         /* drop p_lock while touching the lwp's stack */
1751         mutex_exit(&p->p_lock);
1752         prsetasregs(ttolwp(t), asrset);
1753         mutex_enter(&p->p_lock);
1754 
1755         return (0);
1756 }
1757 #endif
1758 
1759 static int
1760 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1761 {
1762         proc_t *p = pnp->pr_common->prc_proc;
1763         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1764 
1765         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1766                 thread_unlock(t);
1767                 return (EBUSY);
1768         }
1769 
1770         /* drop p_lock while touching the lwp's stack */
1771         thread_unlock(t);
1772         mutex_exit(&p->p_lock);
1773         prsvaddr(ttolwp(t), vaddr);
1774         mutex_enter(&p->p_lock);
1775 
1776         return (0);
1777 }
1778 
1779 void




   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2015, Joyent, Inc.
  29  * Copyright 2023 Oxide Computer Company
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/uio.h>
  34 #include <sys/param.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/cred.h>
  37 #include <sys/policy.h>
  38 #include <sys/debug.h>
  39 #include <sys/errno.h>
  40 #include <sys/file.h>
  41 #include <sys/inline.h>
  42 #include <sys/kmem.h>
  43 #include <sys/proc.h>
  44 #include <sys/brand.h>
  45 #include <sys/regset.h>
  46 #include <sys/sysmacros.h>
  47 #include <sys/systm.h>
  48 #include <sys/vfs.h>
  49 #include <sys/vnode.h>
  50 #include <sys/signal.h>
  51 #include <sys/auxv.h>
  52 #include <sys/user.h>
  53 #include <sys/class.h>
  54 #include <sys/fault.h>
  55 #include <sys/syscall.h>
  56 #include <sys/procfs.h>
  57 #include <sys/zone.h>
  58 #include <sys/copyops.h>
  59 #include <sys/schedctl.h>
  60 #include <vm/as.h>
  61 #include <vm/seg.h>
  62 #include <fs/proc/prdata.h>
  63 #include <sys/contract/process_impl.h>
  64 #include <sys/stdalign.h>
  65 
  66 static  void    pr_settrace(proc_t *, sigset_t *);
  67 static  int     pr_setfpregs(prnode_t *, prfpregset_t *);

  68 static  int     pr_setxregs(prnode_t *, prxregset_t *);


  69 static  int     pr_setvaddr(prnode_t *, caddr_t);
  70 static  int     pr_clearsig(prnode_t *);
  71 static  int     pr_clearflt(prnode_t *);
  72 static  int     pr_watch(prnode_t *, prwatch_t *, int *);
  73 static  int     pr_agent(prnode_t *, prgregset_t, int *);
  74 static  int     pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
  75 static  int     pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
  76 static  int     pr_spriv(proc_t *, prpriv_t *, cred_t *);
  77 static  int     pr_szoneid(proc_t *, zoneid_t, cred_t *);
  78 static  void    pauselwps(proc_t *);
  79 static  void    unpauselwps(proc_t *);
  80 
  81 /*
  82  * This union represents the size of commands that are generally fixed size in
  83  * /proc. There are some commands that are variable size because the actual data
  84  * is structured. Of things in the latter category, some of these are the same
  85  * across all architectures (e.g. prcred_t, prpriv_t) and some vary and are
  86  * opaque (e.g. the prxregset_t).
  87  */
  88 typedef union {
  89         long            sig;            /* PCKILL, PCUNKILL */
  90         long            nice;           /* PCNICE */
  91         long            timeo;          /* PCTWSTOP */
  92         ulong_t         flags;          /* PCRUN, PCSET, PCUNSET */
  93         caddr_t         vaddr;          /* PCSVADDR */
  94         siginfo_t       siginfo;        /* PCSSIG */
  95         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
  96         fltset_t        fltset;         /* PCSFAULT */
  97         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
  98         prgregset_t     prgregset;      /* PCSREG, PCAGENT */
  99         prfpregset_t    prfpregset;     /* PCSFPREG */




 100         prwatch_t       prwatch;        /* PCWATCH */
 101         priovec_t       priovec;        /* PCREAD, PCWRITE */
 102         prcred_t        prcred;         /* PCSCRED */
 103         prpriv_t        prpriv;         /* PCSPRIV */
 104         long            przoneid;       /* PCSZONE */
 105 } arg_t;
 106 
 107 static boolean_t
 108 prwritectl_pcscredx_sizef(const void *datap, size_t *sizep)
 109 {
 110         const prcred_t *cred = datap;
 111 
 112         if (cred->pr_ngroups < 0 || cred->pr_ngroups > ngroups_max) {
 113                 return (B_FALSE);
 114         }
 115 
 116         if (cred->pr_ngroups == 0) {
 117                 *sizep = 0;
 118         } else {
 119                 *sizep = (cred->pr_ngroups - 1) * sizeof (gid_t);
 120         }
 121         return (B_TRUE);
 122 }
 123 
 124 static boolean_t
 125 prwritectl_pcspriv_sizef(const void *datap, size_t *sizep)
 126 {
 127         const prpriv_t *priv = datap;
 128         *sizep = priv_prgetprivsize(priv) - sizeof (prpriv_t);
 129         return (B_TRUE);
 130 }
 131 
 132 /*
 133  * This structure represents a single /proc write command that we support and
 134  * metadata about how to ensure we have sufficient data for it. To determine the
 135  * data that we need to read, this combines information from three different
 136  * sources for a given named command in 'pcs_cmd'. The main goal is to first
 137  * make sure we have the right minimum amount of information so we can read and
 138  * validate the data around variable length structures.
 139  *
 140  *   o Most commands have a fixed static size. This is represented in the
 141  *     pcs_size member. This also is used to represent the base structure size
 142  *     in the case of entries like PCSCREDX.
 143  *
 144  *   o Other commands have an unknown minimum size to determine how much data
 145  *     there is and they use the pcs_minf() function to determine the right
 146  *     value. This is often unknown at compile time because it is say a
 147  *     machdep or ISA based feature (ala PCSXREGS) and we'd rather not #ifdef
 148  *     this code to death. This may be skipped and is for most things. The value
 149  *     it returns is added to the static value.
 150  *
 151  *   o The final piece is the pcs_sizef() function pointer which determines the
 152  *     total required size for this. It is given a pointer that has at least
 153  *     pcs_size and pcs_minf() bytes. This is used to determine the total
 154  *     expected size of the structure. Callers must not dereference data beyond
 155  *     what they've indicated previously. This should only return exra bytes
 156  *     that are required beyond what was already indicated between the two
 157  *     functions.
 158  *
 159  * In all cases, the core prwritectl() logic will determine if there is
 160  * sufficient step along the way for each of these to proceed.
 161  */
 162 typedef struct proc_control_info {
 163         long    pcs_cmd;
 164         size_t  pcs_size;
 165         boolean_t (*pcs_minf)(size_t *);
 166         boolean_t (*pcs_sizef)(const void *, size_t *);
 167 } proc_control_info_t;
 168 
 169 static const proc_control_info_t proc_ctl_info[] = {
 170         { PCNULL, 0, NULL, NULL },
 171         { PCSTOP, 0, NULL, NULL },
 172         { PCDSTOP, 0, NULL, NULL },
 173         { PCWSTOP, 0, NULL, NULL },
 174         { PCCSIG, 0, NULL, NULL },
 175         { PCCFAULT, 0, NULL, NULL },
 176         { PCSSIG, sizeof (siginfo_t), NULL, NULL },
 177         { PCTWSTOP, sizeof (long), NULL, NULL },
 178         { PCKILL, sizeof (long), NULL, NULL },
 179         { PCUNKILL, sizeof (long), NULL, NULL },
 180         { PCNICE, sizeof (long), NULL, NULL },
 181         { PCRUN, sizeof (ulong_t), NULL, NULL },
 182         { PCSET, sizeof (ulong_t), NULL, NULL },
 183         { PCUNSET, sizeof (ulong_t), NULL, NULL },
 184         { PCSTRACE, sizeof (sigset_t), NULL, NULL },
 185         { PCSHOLD, sizeof (sigset_t), NULL, NULL },
 186         { PCSFAULT, sizeof (fltset_t), NULL, NULL },
 187         { PCSENTRY, sizeof (sysset_t), NULL, NULL },
 188         { PCSEXIT, sizeof (sysset_t), NULL, NULL },
 189         { PCSREG, sizeof (prgregset_t), NULL, NULL },
 190         { PCAGENT, sizeof (prgregset_t), NULL, NULL },
 191         { PCSFPREG, sizeof (prfpregset_t), NULL, NULL },
 192         { PCSXREG, 0, prwriteminxreg, prwritesizexreg },
 193         { PCWATCH, sizeof (prwatch_t), NULL },
 194         { PCREAD, sizeof (priovec_t), NULL, NULL },
 195         { PCWRITE, sizeof (priovec_t), NULL, NULL },
 196         { PCSCRED, sizeof (prcred_t), NULL, NULL },
 197         { PCSCREDX, sizeof (prcred_t), NULL, prwritectl_pcscredx_sizef },
 198         { PCSPRIV, sizeof (prpriv_t), NULL, prwritectl_pcspriv_sizef },
 199         { PCSZONE, sizeof (long), NULL },
 200 };
 201 
 202 /*
 203  * We need a default buffer that we're going to allocate when we need memory to
 204  * read control operations. This is on average large enough to hold multiple
 205  * control operations. We leave this as a smaller value on debug builds just
 206  * to exercise our reallocation logic.
 207  */
 208 #ifdef  DEBUG
 209 #define PROC_CTL_DEFSIZE        32
 210 #else
 211 #define PROC_CTL_DEFSIZE        1024
 212 #endif
 213 
 214 /*
 215  * This structure is used to track all of the information that we have around a
 216  * prwritectl call. This is used to reduce function parameters and make state
 217  * clear.
 218  */
 219 typedef struct {
 220         void    *prwc_buf;
 221         size_t  prwc_buflen;
 222         size_t  prwc_curvalid;
 223         uio_t   *prwc_uiop;
 224         prnode_t *prwc_pnp;
 225         boolean_t prwc_locked;
 226         boolean_t prwc_need32;
 227         void    *prwc_buf32;
 228 } prwritectl_t;
 229 
 230 /*
 231  * Attempt to read in at least needed data. If we need to read in data, then we
 232  * will try to fill in as much data as required.
 233  */
 234 static int
 235 prwritectl_readin(prwritectl_t *prwc, size_t needed)
 236 {
 237         int ret;
 238         size_t toread;
 239         void *start;
 240 
 241         /*
 242          * If we have as much data as we need then we're good to go.

 243          */
 244         if (prwc->prwc_curvalid > needed) {
 245                 ASSERT3U(prwc->prwc_buflen, >=, prwc->prwc_curvalid);
 246                 ASSERT3U(prwc->prwc_buflen, >=, needed);
 247                 return (0);
 248         }


 249 
 250         /*
 251          * We don't have all of our data. We must make sure of several things:
 252          *
 253          *   1. That there actually is enough data in the uio_t for what we
 254          *      need, considering what we've already read.
 255          *   2. If the process is locked, at this point, we want to unlock it
 256          *      before we deal with any I/O or memory allocation. Otherwise we
 257          *      can wreak havoc with p_lock / paging.
 258          *   3. We need to make sure that our buffer is large enough to actually
 259          *      fit it all.
 260          *   4. Only at that point can we actually perform the read.
 261          */
 262         if (needed - prwc->prwc_curvalid > prwc->prwc_uiop->uio_resid) {
 263                 return (EINVAL);

 264         }
 265 
 266         if (prwc->prwc_locked) {
 267                 prunlock(prwc->prwc_pnp);
 268                 prwc->prwc_locked = B_FALSE;
 269         }
 270 
 271         if (needed > prwc->prwc_buflen) {
 272                 size_t new_len = P2ROUNDUP(needed, PROC_CTL_DEFSIZE);
 273                 prwc->prwc_buf = kmem_rezalloc(prwc->prwc_buf,
 274                     prwc->prwc_buflen, new_len, KM_SLEEP);
 275                 if (prwc->prwc_need32) {
 276                         prwc->prwc_buf32 = kmem_rezalloc(prwc->prwc_buf32,
 277                             prwc->prwc_buflen, new_len, KM_SLEEP);
 278                 }
 279                 prwc->prwc_buflen = new_len;
 280         }
 281 
 282         toread = MIN(prwc->prwc_buflen - prwc->prwc_curvalid,
 283             prwc->prwc_uiop->uio_resid);
 284         ASSERT3U(toread, >=, needed - prwc->prwc_curvalid);
 285         start = (void *)((uintptr_t)prwc->prwc_buf + prwc->prwc_curvalid);
 286         if ((ret = uiomove(start, toread, UIO_WRITE, prwc->prwc_uiop)) != 0) {
 287                 return (ret);
 288         }
 289 
 290         prwc->prwc_curvalid += toread;
 291         return (0);

 292 }
 293 
 294 static const proc_control_info_t *
 295 prwritectl_cmd_identify(const prwritectl_t *prwc,
 296     const proc_control_info_t *info, size_t ninfo, size_t cmdsize)
 297 {
 298         long cmd;
 299 
 300         ASSERT(cmdsize == sizeof (int32_t) || cmdsize == sizeof (long));
 301         if (cmdsize == 4) {
 302                 cmd = (long)*(int32_t *)prwc->prwc_buf;
 303         } else {
 304                 cmd = *(long *)prwc->prwc_buf;
 305         }
 306 
 307 
 308         for (size_t i = 0; i < ninfo; i++) {
 309                 if (info[i].pcs_cmd == cmd) {
 310                         return (&info[i]);
 311                 }
 312         }
 313 
 314         return (NULL);
 315 }
 316 
 317 /*
 318  * Control operations (lots).
 319  *
 320  * Users can submit one or more commands to us in the uio_t. They are required
 321  * to always be complete messages. The first one that fails will cause all
 322  * subsequent things to fail. Processing this can be a little tricky as the
 323  * actual data size that may be required is variable, not all structures are
 324  * fixed sizes and some vary based on the instructing set (e.g. x86 vs.
 325  * something else).
 326  *
 327  * The way that we handle process locking deserves some consideration. Prior to
 328  * the colonization of prwritectl and the support for dynamic sizing of data,
 329  * the logic would try to read in a large chunk of data and keep a process
 330  * locked throughout that period and then unlock it before reading more data. As
 331  * such, we mimic that logically and basically lock it before executing the
 332  * first (or any subsequent) command and then only unlock it either when we're
 333  * done entirely or we need to allocate memory or read from the process.
 334  *
 335  * This function is a common implementation for both the ILP32 and LP64 entry
 336  * points as they are mostly the same except for the sizing and control function
 337  * we call.
 338  */
 339 int
 340 prwritectl_common(vnode_t *vp, uio_t *uiop, cred_t *cr,
 341     const proc_control_info_t *proc_info, size_t ninfo, size_t cmdsize,
 342     int (*pr_controlf)(long, void *, prnode_t *, cred_t *))
 343 {
 344         int ret;
 345         prwritectl_t prwc;








 346 
 347         VERIFY(cmdsize == sizeof (int32_t) || cmdsize == sizeof (long));
 348 
 349         bzero(&prwc, sizeof (prwc));
 350         prwc.prwc_pnp = VTOP(vp);
 351         prwc.prwc_uiop = uiop;
 352         prwc.prwc_need32 = cmdsize == sizeof (int32_t);
 353 
 354         /*
 355          * We may have multiple commands to read and want to try to minimize the
 356          * amount of reading that we do. Our callers expect us to have a
 357          * contiguous buffer for a command's actual implementation. However, we
 358          * must have at least a single long worth of data, otherwise it's not
 359          * worth continuing.
 360          */
 361         while (uiop->uio_resid > 0 || prwc.prwc_curvalid > 0) {
 362                 const proc_control_info_t *proc_cmd;
 363                 void *data;
 364 
 365                 /*
 366                  * Check if we have enough data to identify a command. If not,
 367                  * we read as much as we can in one gulp.
 368                  */
 369                 if ((ret = prwritectl_readin(&prwc, cmdsize)) != 0) {
 370                         goto out;
 371                 }







 372 
 373                 /*
 374                  * Identify the command and figure out how how much data we
 375                  * should have read in the kernel. Some commands have a variable
 376                  * length and we need to make sure the minimum is met before
 377                  * asking how much there is in general. Most things know what
 378                  * the minimum length is and this pcs_minf() is not implemented.
 379                  * However things that are ISA-specific require us to ask that
 380                  * first.
 381                  *
 382                  * We also must be aware that there may not actually be enough
 383                  * data present in the uio_t.
 384                  */
 385                 if ((proc_cmd = prwritectl_cmd_identify(&prwc, proc_info,
 386                     ninfo, cmdsize)) == NULL) {
 387                         ret = EINVAL;
 388                         goto out;
 389                 }
 390 
 391                 size_t needed_data = cmdsize + proc_cmd->pcs_size;
 392                 if (proc_cmd->pcs_minf != NULL) {
 393                         size_t min;
 394 
 395                         if (!proc_cmd->pcs_minf(&min)) {
 396                                 ret = EINVAL;
 397                                 goto out;
 398                         }
 399 
 400                         needed_data += min;
 401                 }
 402 
 403                 if (proc_cmd->pcs_sizef != NULL) {
 404                         size_t extra;
 405 
 406                         /*
 407                          * Make sure we have the minimum amount of data that
 408                          * they asked us to between the static and minf
 409                          * function.
 410                          */
 411                         if ((ret = prwritectl_readin(&prwc, needed_data)) !=
 412                             0) {
 413                                 goto out;

 414                         }
 415 
 416                         VERIFY3U(prwc.prwc_curvalid, >, cmdsize);
 417                         data = (void *)((uintptr_t)prwc.prwc_buf + cmdsize);
 418                         if (!proc_cmd->pcs_sizef(data, &extra)) {
 419                                 ret = EINVAL;
 420                                 goto out;
 421                         }


 422 
 423                         needed_data += extra;


 424                 }
 425 
 426                 /*
 427                  * Now that we know how much data we're supposed to have,
 428                  * finally ensure we have the total amount we need.
 429                  */
 430                 if ((ret = prwritectl_readin(&prwc, needed_data)) != 0) {
 431                         goto out;
 432                 }
 433 
 434                 /*
 435                  * /proc has traditionally assumed control writes come in
 436                  * multiples of a long. This is 4 bytes for ILP32 and 8 bytes
 437                  * for LP64. When calculating the required size for a structure,
 438                  * it would always round that up to the next long. However, the
 439                  * exact combination of circumstances changes with the
 440                  * introduction of the 64-bit kernel. For 64-bit processes we
 441                  * round up when the current command we're processing isn't the
 442                  * last one.
 443                  *
 444                  * Because of our tracking structures and caching we need to
 445                  * look beyond the uio_t to make this determination. In
 446                  * particular, the uio_t can have a zero resid, but we may still
 447                  * have additional data to read as indicated by prwc_curvalid
 448                  * exceeded the current command size. In the end, we must check
 449                  * both of these cases.
 450                  */
 451                 if ((needed_data % cmdsize) != 0) {
 452                         if (cmdsize == sizeof (int32_t) ||
 453                             prwc.prwc_curvalid > needed_data ||
 454                             prwc.prwc_uiop->uio_resid > 0) {
 455                                 needed_data = P2ROUNDUP(needed_data,
 456                                     cmdsize);
 457                                 if ((ret = prwritectl_readin(&prwc,
 458                                     needed_data)) != 0) {
 459                                         goto out;
 460                                 }
 461                         }
 462                 }
 463 
 464                 if (!prwc.prwc_locked) {
 465                         ret = prlock(prwc.prwc_pnp, ZNO);
 466                         if (ret != 0) {
 467                                 goto out;
 468                         }
 469                         prwc.prwc_locked = B_TRUE;
 470                 }
 471 
 472                 /*
 473                  * Run our actual command. When there is an error, then the
 474                  * underlying pr_control call will have unlocked the prnode_t
 475                  * on our behalf. pr_control can return -1, which is a special
 476                  * error indicating a timeout occurred. In such a case the node
 477                  * is unlocked; however, that we are supposed to continue
 478                  * processing commands regardless.
 479                  *
 480                  * Finally, we must deal with with one actual wrinkle. The LP64
 481                  * based logic always guarantees that we have data that is
 482                  * 8-byte aligned. However, the ILP32 logic is 4-byte aligned
 483                  * and the rest of the /proc code assumes it can always
 484                  * dereference it. If we're not aligned, we have to bcopy it to
 485                  * a temporary buffer.
 486                  */
 487                 data = (void *)((uintptr_t)prwc.prwc_buf + cmdsize);
 488 #ifdef  DEBUG
 489                 if (cmdsize == sizeof (long)) {
 490                         VERIFY0((uintptr_t)data % alignof (long));
 491                 }
 492 #endif
 493                 if (prwc.prwc_need32 && ((uintptr_t)data % alignof (long)) !=
 494                     0 && needed_data > cmdsize) {
 495                         bcopy(data, prwc.prwc_buf32, needed_data - cmdsize);
 496                         data = prwc.prwc_buf32;
 497                 }
 498                 ret = pr_controlf(proc_cmd->pcs_cmd, data, prwc.prwc_pnp, cr);
 499                 if (ret != 0) {
 500                         prwc.prwc_locked = B_FALSE;
 501                         if (ret > 0) {
 502                                 goto out;
 503                         }
 504                 }
 505 
 506                 /*
 507                  * Finally, now that we have processed this command, we need to
 508                  * move on. To make our life simple, we basically shift all the
 509                  * data in our buffer over to indicate it's been consumed. While
 510                  * a little wasteful, this simplifies buffer management and
 511                  * guarantees that command processing uses a semi-sanitized
 512                  * state. Visually, this is the following transformation:
 513                  *
 514                  *  0                   20              prwc.prwc_curvalid
 515                  *   +------------------+----------------+
 516                  *   |   needed_data    | remaining_data |
 517                  *   +------------------+----------------+
 518                  *
 519                  * In the above example we are shifting all the data over by 20,
 520                  * so remaining data starts at 0. This leaves us needed_data
 521                  * bytes to clean up from what was valid.
 522                  */
 523                 if (prwc.prwc_buf32 != NULL) {
 524                         bzero(prwc.prwc_buf32, needed_data - cmdsize);
 525                 }
 526 
 527                 if (prwc.prwc_curvalid > needed_data) {
 528                         size_t save_size = prwc.prwc_curvalid - needed_data;
 529                         void *first_save = (void *)((uintptr_t)prwc.prwc_buf +
 530                             needed_data);
 531                         memmove(prwc.prwc_buf, first_save, save_size);
 532                         void *first_zero = (void *)((uintptr_t)prwc.prwc_buf +
 533                             save_size);
 534                         bzero(first_zero, needed_data);
 535                 } else {
 536                         bzero(prwc.prwc_buf, prwc.prwc_curvalid);
 537                 }
 538                 prwc.prwc_curvalid -= needed_data;
 539         }
 540 
 541         /*
 542          * We've managed to successfully process everything. We can actually say
 543          * this was successful now.
 544          */
 545         ret = 0;
 546 
 547 out:
 548         if (prwc.prwc_locked) {
 549                 prunlock(prwc.prwc_pnp);
 550                 prwc.prwc_locked = B_FALSE;
 551         }
 552 
 553         if (prwc.prwc_buf != NULL) {
 554                 kmem_free(prwc.prwc_buf, prwc.prwc_buflen);
 555         }
 556 
 557         if (prwc.prwc_buf32 != NULL) {
 558                 VERIFY(prwc.prwc_need32);
 559                 kmem_free(prwc.prwc_buf32, prwc.prwc_buflen);
 560         }
 561 
 562         return (ret);
 563 }
 564 
 565 static int
 566 pr_control(long cmd, void *generic, prnode_t *pnp, cred_t *cr)
 567 {
 568         prcommon_t *pcp;
 569         proc_t *p;
 570         int unlocked;
 571         int error = 0;
 572         arg_t *argp = generic;
 573 
 574         if (cmd == PCNULL)
 575                 return (0);
 576 
 577         pcp = pnp->pr_common;
 578         p = pcp->prc_proc;
 579         ASSERT(p != NULL);
 580 
 581         /* System processes defy control. */
 582         if (p->p_flag & SSYS) {
 583                 prunlock(pnp);
 584                 return (EBUSY);
 585         }
 586 
 587         switch (cmd) {
 588 
 589         default:
 590                 error = EINVAL;
 591                 break;
 592 


 681                 {
 682                         kthread_t *t = pr_thread(pnp);
 683 
 684                         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
 685                                 thread_unlock(t);
 686                                 error = EBUSY;
 687                         } else {
 688                                 thread_unlock(t);
 689                                 mutex_exit(&p->p_lock);
 690                                 prsetprregs(ttolwp(t), argp->prgregset, 0);
 691                                 mutex_enter(&p->p_lock);
 692                         }
 693                         break;
 694                 }
 695 
 696         case PCSFPREG:  /* set floating-point registers */
 697                 error = pr_setfpregs(pnp, &argp->prfpregset);
 698                 break;
 699 
 700         case PCSXREG:   /* set extra registers */
 701                 error = pr_setxregs(pnp, (prxregset_t *)argp);




 702                 break;
 703 






 704         case PCSVADDR:  /* set virtual address at which to resume */
 705                 error = pr_setvaddr(pnp, argp->vaddr);
 706                 break;
 707 
 708         case PCSHOLD:   /* set signal-hold mask */
 709                 pr_sethold(pnp, &argp->sigset);
 710                 break;
 711 
 712         case PCSFAULT:  /* set mask of traced faults */
 713                 pr_setfault(p, &argp->fltset);
 714                 break;
 715 
 716         case PCCSIG:    /* clear current signal */
 717                 error = pr_clearsig(pnp);
 718                 break;
 719 
 720         case PCCFAULT:  /* clear current fault */
 721                 error = pr_clearflt(pnp);
 722                 break;
 723 


 742                 break;
 743 
 744         case PCSCRED:   /* set the process credentials */
 745         case PCSCREDX:
 746                 error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
 747                 break;
 748 
 749         case PCSPRIV:   /* set the process privileges */
 750                 error = pr_spriv(p, &argp->prpriv, cr);
 751                 break;
 752         case PCSZONE:   /* set the process's zoneid credentials */
 753                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
 754                 break;
 755         }
 756 
 757         if (error)
 758                 prunlock(pnp);
 759         return (error);
 760 }
 761 
 762 int
 763 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
 764 {
 765         return (prwritectl_common(vp, uiop, cr, proc_ctl_info,
 766             ARRAY_SIZE(proc_ctl_info), sizeof (long), pr_control));
 767 }
 768 
 769 #ifdef _SYSCALL32_IMPL
 770 
 771 typedef union {
 772         int32_t         sig;            /* PCKILL, PCUNKILL */
 773         int32_t         nice;           /* PCNICE */
 774         int32_t         timeo;          /* PCTWSTOP */
 775         uint32_t        flags;          /* PCRUN, PCSET, PCUNSET */
 776         caddr32_t       vaddr;          /* PCSVADDR */
 777         siginfo32_t     siginfo;        /* PCSSIG */
 778         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
 779         fltset_t        fltset;         /* PCSFAULT */
 780         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
 781         prgregset32_t   prgregset;      /* PCSREG, PCAGENT */
 782         prfpregset32_t  prfpregset;     /* PCSFPREG */



 783         prwatch32_t     prwatch;        /* PCWATCH */
 784         priovec32_t     priovec;        /* PCREAD, PCWRITE */
 785         prcred32_t      prcred;         /* PCSCRED */
 786         prpriv_t        prpriv;         /* PCSPRIV */
 787         int32_t         przoneid;       /* PCSZONE */
 788 } arg32_t;
 789 

 790 static  int     pr_setfpregs32(prnode_t *, prfpregset32_t *);
 791 
 792 static boolean_t
 793 prwritectl_pcscredx32_sizef(const void *datap, size_t *sizep)





 794 {
 795         const prcred32_t *cred = datap;


 796 
 797         if (cred->pr_ngroups < 0 || cred->pr_ngroups > ngroups_max) {
 798                 return (B_FALSE);




















































































 799         }
 800 
 801         if (cred->pr_ngroups == 0) {
 802                 *sizep = 0;
 803         } else {
 804                 *sizep = (cred->pr_ngroups - 1) * sizeof (gid32_t);
 805         }
 806         return (B_TRUE);



 807 }
 808 
 809 /*
 810  * When dealing with ILP32 code, we are not at a point where we can assume
 811  * 64-bit aligned data. Any functions that are operating here must be aware of
 812  * that.
 813  */
 814 static const proc_control_info_t proc_ctl_info32[] = {
 815         { PCNULL, 0, NULL, NULL },
 816         { PCSTOP, 0, NULL, NULL },
 817         { PCDSTOP, 0, NULL, NULL },
 818         { PCWSTOP, 0, NULL, NULL },
 819         { PCCSIG, 0, NULL, NULL },
 820         { PCCFAULT, 0, NULL, NULL },
 821         { PCSSIG, sizeof (siginfo32_t), NULL, NULL },
 822         { PCTWSTOP, sizeof (int32_t), NULL, NULL },
 823         { PCKILL, sizeof (int32_t), NULL, NULL },
 824         { PCUNKILL, sizeof (int32_t), NULL, NULL },
 825         { PCNICE, sizeof (int32_t), NULL, NULL },
 826         { PCRUN, sizeof (uint32_t), NULL, NULL },
 827         { PCSET, sizeof (uint32_t), NULL, NULL },
 828         { PCUNSET, sizeof (uint32_t), NULL, NULL },
 829         { PCSVADDR, sizeof (caddr32_t), NULL, NULL },
 830         { PCSTRACE, sizeof (sigset_t), NULL, NULL },
 831         { PCSHOLD, sizeof (sigset_t), NULL, NULL },
 832         { PCSFAULT, sizeof (fltset_t), NULL, NULL },
 833         { PCSENTRY, sizeof (sysset_t), NULL, NULL },
 834         { PCSEXIT, sizeof (sysset_t), NULL, NULL },
 835         { PCSREG, sizeof (prgregset32_t), NULL, NULL },
 836         { PCAGENT, sizeof (prgregset32_t), NULL, NULL },
 837         { PCSFPREG, sizeof (prfpregset32_t), NULL, NULL },
 838         { PCSXREG, 0, prwriteminxreg, prwritesizexreg },
 839         { PCWATCH, sizeof (prwatch32_t), NULL },
 840         { PCREAD, sizeof (priovec32_t), NULL, NULL },
 841         { PCWRITE, sizeof (priovec32_t), NULL, NULL },
 842         { PCSCRED, sizeof (prcred32_t), NULL, NULL },
 843         { PCSCREDX, sizeof (prcred32_t), NULL, prwritectl_pcscredx32_sizef },
 844         { PCSPRIV, sizeof (prpriv_t), NULL, prwritectl_pcspriv_sizef },
 845         { PCSZONE, sizeof (long), NULL },
 846 };
 847 




































































 848 static int
 849 pr_control32(long cmd, void *generic, prnode_t *pnp, cred_t *cr)
 850 {
 851         prcommon_t *pcp;
 852         proc_t *p;
 853         int unlocked;
 854         int error = 0;
 855         arg32_t *argp = generic;
 856 
 857         if (cmd == PCNULL)
 858                 return (0);
 859 
 860         pcp = pnp->pr_common;
 861         p = pcp->prc_proc;
 862         ASSERT(p != NULL);
 863 
 864         if (p->p_flag & SSYS) {
 865                 prunlock(pnp);
 866                 return (EBUSY);
 867         }
 868 
 869         switch (cmd) {
 870 
 871         default:
 872                 error = EINVAL;
 873                 break;
 874 
 875         case PCSTOP:    /* direct process or lwp to stop and wait for stop */


 982                                 klwp_t *lwp = ttolwp(t);
 983 
 984                                 thread_unlock(t);
 985                                 mutex_exit(&p->p_lock);
 986                                 prgregset_32ton(lwp, argp->prgregset,
 987                                     prgregset);
 988                                 prsetprregs(lwp, prgregset, 0);
 989                                 mutex_enter(&p->p_lock);
 990                         }
 991                 }
 992                 break;
 993 
 994         case PCSFPREG:  /* set floating-point registers */
 995                 if (PROCESS_NOT_32BIT(p))
 996                         error = EOVERFLOW;
 997                 else
 998                         error = pr_setfpregs32(pnp, &argp->prfpregset);
 999                 break;
1000 
1001         case PCSXREG:   /* set extra registers */

1002                 if (PROCESS_NOT_32BIT(p))
1003                         error = EOVERFLOW;
1004                 else
1005                         error = pr_setxregs(pnp, (prxregset_t *)argp);



1006                 break;
1007 
1008         case PCSVADDR:  /* set virtual address at which to resume */
1009                 if (PROCESS_NOT_32BIT(p))
1010                         error = EOVERFLOW;
1011                 else
1012                         error = pr_setvaddr(pnp,
1013                             (caddr_t)(uintptr_t)argp->vaddr);
1014                 break;
1015 
1016         case PCSHOLD:   /* set signal-hold mask */
1017                 pr_sethold(pnp, &argp->sigset);
1018                 break;
1019 
1020         case PCSFAULT:  /* set mask of traced faults */
1021                 pr_setfault(p, &argp->fltset);
1022                 break;
1023 
1024         case PCCSIG:    /* clear current signal */
1025                 error = pr_clearsig(pnp);


1095 #endif
1096 
1097                         error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
1098                         break;
1099                 }
1100 
1101         case PCSPRIV:   /* set the process privileges */
1102                 error = pr_spriv(p, &argp->prpriv, cr);
1103                 break;
1104 
1105         case PCSZONE:   /* set the process's zoneid */
1106                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
1107                 break;
1108         }
1109 
1110         if (error)
1111                 prunlock(pnp);
1112         return (error);
1113 }
1114 
1115 int
1116 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
1117 {
1118         return (prwritectl_common(vp, uiop, cr, proc_ctl_info32,
1119             ARRAY_SIZE(proc_ctl_info32), sizeof (int32_t), pr_control32));
1120 }
1121 #endif  /* _SYSCALL32_IMPL */
1122 
1123 /*
1124  * Return the specific or chosen thread/lwp for a control operation.
1125  * Returns with the thread locked via thread_lock(t).
1126  */
1127 kthread_t *
1128 pr_thread(prnode_t *pnp)
1129 {
1130         prcommon_t *pcp = pnp->pr_common;
1131         kthread_t *t;
1132 
1133         if (pcp->prc_flags & PRC_LWP) {
1134                 t = pcp->prc_thread;
1135                 ASSERT(t != NULL);
1136                 thread_lock(t);
1137         } else {
1138                 proc_t *p = pcp->prc_proc;
1139                 t = prchoose(p);        /* returns locked thread */
1140                 ASSERT(t != NULL);


1824 
1825         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1826                 thread_unlock(t);
1827                 return (EBUSY);
1828         }
1829         if (!prhasfp()) {
1830                 thread_unlock(t);
1831                 return (EINVAL);        /* No FP support */
1832         }
1833 
1834         /* drop p_lock while touching the lwp's stack */
1835         thread_unlock(t);
1836         mutex_exit(&p->p_lock);
1837         prsetprfpregs32(ttolwp(t), prfpregset);
1838         mutex_enter(&p->p_lock);
1839 
1840         return (0);
1841 }
1842 #endif  /* _SYSCALL32_IMPL */
1843 

1844 /* ARGSUSED */
1845 static int
1846 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1847 {
1848         int error;
1849         proc_t *p = pnp->pr_common->prc_proc;
1850         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1851 
1852         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1853                 thread_unlock(t);
1854                 return (EBUSY);
1855         }
1856         thread_unlock(t);
1857 
1858         if (!prhasx(p))
1859                 return (EINVAL);        /* No extra register support */
1860 
1861         /* drop p_lock while touching the lwp's stack */
1862         mutex_exit(&p->p_lock);
1863         error = prsetprxregs(ttolwp(t), prxregset);
1864         mutex_enter(&p->p_lock);
1865 
1866         return (error);
1867 }
1868 
1869 static int





















1870 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1871 {
1872         proc_t *p = pnp->pr_common->prc_proc;
1873         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1874 
1875         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1876                 thread_unlock(t);
1877                 return (EBUSY);
1878         }
1879 
1880         /* drop p_lock while touching the lwp's stack */
1881         thread_unlock(t);
1882         mutex_exit(&p->p_lock);
1883         prsvaddr(ttolwp(t), vaddr);
1884         mutex_enter(&p->p_lock);
1885 
1886         return (0);
1887 }
1888 
1889 void