1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2015, Joyent, Inc.
29 * Copyright 2023 Oxide Computer Company
30 */
31
32 #include <sys/types.h>
33 #include <sys/uio.h>
34 #include <sys/param.h>
35 #include <sys/cmn_err.h>
36 #include <sys/cred.h>
37 #include <sys/policy.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
40 #include <sys/file.h>
41 #include <sys/inline.h>
42 #include <sys/kmem.h>
43 #include <sys/proc.h>
44 #include <sys/brand.h>
45 #include <sys/regset.h>
46 #include <sys/sysmacros.h>
47 #include <sys/systm.h>
48 #include <sys/vfs.h>
49 #include <sys/vnode.h>
50 #include <sys/signal.h>
51 #include <sys/auxv.h>
52 #include <sys/user.h>
53 #include <sys/class.h>
54 #include <sys/fault.h>
55 #include <sys/syscall.h>
56 #include <sys/procfs.h>
57 #include <sys/zone.h>
58 #include <sys/copyops.h>
59 #include <sys/schedctl.h>
60 #include <vm/as.h>
61 #include <vm/seg.h>
62 #include <fs/proc/prdata.h>
63 #include <sys/contract/process_impl.h>
64 #include <sys/stdalign.h>
65
66 static void pr_settrace(proc_t *, sigset_t *);
67 static int pr_setfpregs(prnode_t *, prfpregset_t *);
68 static int pr_setxregs(prnode_t *, prxregset_t *);
69 static int pr_setvaddr(prnode_t *, caddr_t);
70 static int pr_clearsig(prnode_t *);
71 static int pr_clearflt(prnode_t *);
72 static int pr_watch(prnode_t *, prwatch_t *, int *);
73 static int pr_agent(prnode_t *, prgregset_t, int *);
74 static int pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
75 static int pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
76 static int pr_spriv(proc_t *, prpriv_t *, cred_t *);
77 static int pr_szoneid(proc_t *, zoneid_t, cred_t *);
78 static void pauselwps(proc_t *);
79 static void unpauselwps(proc_t *);
80
81 /*
82 * This union represents the size of commands that are generally fixed size in
83 * /proc. There are some commands that are variable size because the actual data
84 * is structured. Of things in the latter category, some of these are the same
85 * across all architectures (e.g. prcred_t, prpriv_t) and some vary and are
86 * opaque (e.g. the prxregset_t).
87 */
88 typedef union {
89 long sig; /* PCKILL, PCUNKILL */
90 long nice; /* PCNICE */
91 long timeo; /* PCTWSTOP */
92 ulong_t flags; /* PCRUN, PCSET, PCUNSET */
93 caddr_t vaddr; /* PCSVADDR */
94 siginfo_t siginfo; /* PCSSIG */
95 sigset_t sigset; /* PCSTRACE, PCSHOLD */
96 fltset_t fltset; /* PCSFAULT */
97 sysset_t sysset; /* PCSENTRY, PCSEXIT */
98 prgregset_t prgregset; /* PCSREG, PCAGENT */
99 prfpregset_t prfpregset; /* PCSFPREG */
100 prwatch_t prwatch; /* PCWATCH */
101 priovec_t priovec; /* PCREAD, PCWRITE */
102 prcred_t prcred; /* PCSCRED */
103 prpriv_t prpriv; /* PCSPRIV */
104 long przoneid; /* PCSZONE */
105 } arg_t;
106
107 static boolean_t
108 prwritectl_pcscredx_sizef(const void *datap, size_t *sizep)
109 {
110 const prcred_t *cred = datap;
111
112 if (cred->pr_ngroups < 0 || cred->pr_ngroups > ngroups_max) {
113 return (B_FALSE);
114 }
115
116 if (cred->pr_ngroups == 0) {
117 *sizep = 0;
118 } else {
119 *sizep = (cred->pr_ngroups - 1) * sizeof (gid_t);
120 }
121 return (B_TRUE);
122 }
123
124 static boolean_t
125 prwritectl_pcspriv_sizef(const void *datap, size_t *sizep)
126 {
127 const prpriv_t *priv = datap;
128 *sizep = priv_prgetprivsize(priv) - sizeof (prpriv_t);
129 return (B_TRUE);
130 }
131
132 /*
133 * This structure represents a single /proc write command that we support and
134 * metadata about how to ensure we have sufficient data for it. To determine the
135 * data that we need to read, this combines information from three different
136 * sources for a given named command in 'pcs_cmd'. The main goal is to first
137 * make sure we have the right minimum amount of information so we can read and
138 * validate the data around variable length structures.
139 *
140 * o Most commands have a fixed static size. This is represented in the
141 * pcs_size member. This also is used to represent the base structure size
142 * in the case of entries like PCSCREDX.
143 *
144 * o Other commands have an unknown minimum size to determine how much data
145 * there is and they use the pcs_minf() function to determine the right
146 * value. This is often unknown at compile time because it is say a
147 * machdep or ISA based feature (ala PCSXREGS) and we'd rather not #ifdef
148 * this code to death. This may be skipped and is for most things. The value
149 * it returns is added to the static value.
150 *
151 * o The final piece is the pcs_sizef() function pointer which determines the
152 * total required size for this. It is given a pointer that has at least
153 * pcs_size and pcs_minf() bytes. This is used to determine the total
154 * expected size of the structure. Callers must not dereference data beyond
155 * what they've indicated previously. This should only return exra bytes
156 * that are required beyond what was already indicated between the two
157 * functions.
158 *
159 * In all cases, the core prwritectl() logic will determine if there is
160 * sufficient step along the way for each of these to proceed.
161 */
162 typedef struct proc_control_info {
163 long pcs_cmd;
164 size_t pcs_size;
165 boolean_t (*pcs_minf)(size_t *);
166 boolean_t (*pcs_sizef)(const void *, size_t *);
167 } proc_control_info_t;
168
169 static const proc_control_info_t proc_ctl_info[] = {
170 { PCNULL, 0, NULL, NULL },
171 { PCSTOP, 0, NULL, NULL },
172 { PCDSTOP, 0, NULL, NULL },
173 { PCWSTOP, 0, NULL, NULL },
174 { PCCSIG, 0, NULL, NULL },
175 { PCCFAULT, 0, NULL, NULL },
176 { PCSSIG, sizeof (siginfo_t), NULL, NULL },
177 { PCTWSTOP, sizeof (long), NULL, NULL },
178 { PCKILL, sizeof (long), NULL, NULL },
179 { PCUNKILL, sizeof (long), NULL, NULL },
180 { PCNICE, sizeof (long), NULL, NULL },
181 { PCRUN, sizeof (ulong_t), NULL, NULL },
182 { PCSET, sizeof (ulong_t), NULL, NULL },
183 { PCUNSET, sizeof (ulong_t), NULL, NULL },
184 { PCSTRACE, sizeof (sigset_t), NULL, NULL },
185 { PCSHOLD, sizeof (sigset_t), NULL, NULL },
186 { PCSFAULT, sizeof (fltset_t), NULL, NULL },
187 { PCSENTRY, sizeof (sysset_t), NULL, NULL },
188 { PCSEXIT, sizeof (sysset_t), NULL, NULL },
189 { PCSREG, sizeof (prgregset_t), NULL, NULL },
190 { PCAGENT, sizeof (prgregset_t), NULL, NULL },
191 { PCSFPREG, sizeof (prfpregset_t), NULL, NULL },
192 { PCSXREG, 0, prwriteminxreg, prwritesizexreg },
193 { PCWATCH, sizeof (prwatch_t), NULL },
194 { PCREAD, sizeof (priovec_t), NULL, NULL },
195 { PCWRITE, sizeof (priovec_t), NULL, NULL },
196 { PCSCRED, sizeof (prcred_t), NULL, NULL },
197 { PCSCREDX, sizeof (prcred_t), NULL, prwritectl_pcscredx_sizef },
198 { PCSPRIV, sizeof (prpriv_t), NULL, prwritectl_pcspriv_sizef },
199 { PCSZONE, sizeof (long), NULL },
200 };
201
202 /*
203 * We need a default buffer that we're going to allocate when we need memory to
204 * read control operations. This is on average large enough to hold multiple
205 * control operations. We leave this as a smaller value on debug builds just
206 * to exercise our reallocation logic.
207 */
208 #ifdef DEBUG
209 #define PROC_CTL_DEFSIZE 32
210 #else
211 #define PROC_CTL_DEFSIZE 1024
212 #endif
213
214 /*
215 * This structure is used to track all of the information that we have around a
216 * prwritectl call. This is used to reduce function parameters and make state
217 * clear.
218 */
219 typedef struct {
220 void *prwc_buf;
221 size_t prwc_buflen;
222 size_t prwc_curvalid;
223 uio_t *prwc_uiop;
224 prnode_t *prwc_pnp;
225 boolean_t prwc_locked;
226 boolean_t prwc_need32;
227 void *prwc_buf32;
228 } prwritectl_t;
229
230 /*
231 * Attempt to read in at least needed data. If we need to read in data, then we
232 * will try to fill in as much data as required.
233 */
234 static int
235 prwritectl_readin(prwritectl_t *prwc, size_t needed)
236 {
237 int ret;
238 size_t toread;
239 void *start;
240
241 /*
242 * If we have as much data as we need then we're good to go.
243 */
244 if (prwc->prwc_curvalid > needed) {
245 ASSERT3U(prwc->prwc_buflen, >=, prwc->prwc_curvalid);
246 ASSERT3U(prwc->prwc_buflen, >=, needed);
247 return (0);
248 }
249
250 /*
251 * We don't have all of our data. We must make sure of several things:
252 *
253 * 1. That there actually is enough data in the uio_t for what we
254 * need, considering what we've already read.
255 * 2. If the process is locked, at this point, we want to unlock it
256 * before we deal with any I/O or memory allocation. Otherwise we
257 * can wreak havoc with p_lock / paging.
258 * 3. We need to make sure that our buffer is large enough to actually
259 * fit it all.
260 * 4. Only at that point can we actually perform the read.
261 */
262 if (needed - prwc->prwc_curvalid > prwc->prwc_uiop->uio_resid) {
263 return (EINVAL);
264 }
265
266 if (prwc->prwc_locked) {
267 prunlock(prwc->prwc_pnp);
268 prwc->prwc_locked = B_FALSE;
269 }
270
271 if (needed > prwc->prwc_buflen) {
272 size_t new_len = P2ROUNDUP(needed, PROC_CTL_DEFSIZE);
273 prwc->prwc_buf = kmem_rezalloc(prwc->prwc_buf,
274 prwc->prwc_buflen, new_len, KM_SLEEP);
275 if (prwc->prwc_need32) {
276 prwc->prwc_buf32 = kmem_rezalloc(prwc->prwc_buf32,
277 prwc->prwc_buflen, new_len, KM_SLEEP);
278 }
279 prwc->prwc_buflen = new_len;
280 }
281
282 toread = MIN(prwc->prwc_buflen - prwc->prwc_curvalid,
283 prwc->prwc_uiop->uio_resid);
284 ASSERT3U(toread, >=, needed - prwc->prwc_curvalid);
285 start = (void *)((uintptr_t)prwc->prwc_buf + prwc->prwc_curvalid);
286 if ((ret = uiomove(start, toread, UIO_WRITE, prwc->prwc_uiop)) != 0) {
287 return (ret);
288 }
289
290 prwc->prwc_curvalid += toread;
291 return (0);
292 }
293
294 static const proc_control_info_t *
295 prwritectl_cmd_identify(const prwritectl_t *prwc,
296 const proc_control_info_t *info, size_t ninfo, size_t cmdsize)
297 {
298 long cmd;
299
300 ASSERT(cmdsize == sizeof (int32_t) || cmdsize == sizeof (long));
301 if (cmdsize == 4) {
302 cmd = (long)*(int32_t *)prwc->prwc_buf;
303 } else {
304 cmd = *(long *)prwc->prwc_buf;
305 }
306
307
308 for (size_t i = 0; i < ninfo; i++) {
309 if (info[i].pcs_cmd == cmd) {
310 return (&info[i]);
311 }
312 }
313
314 return (NULL);
315 }
316
317 /*
318 * Control operations (lots).
319 *
320 * Users can submit one or more commands to us in the uio_t. They are required
321 * to always be complete messages. The first one that fails will cause all
322 * subsequent things to fail. Processing this can be a little tricky as the
323 * actual data size that may be required is variable, not all structures are
324 * fixed sizes and some vary based on the instructing set (e.g. x86 vs.
325 * something else).
326 *
327 * The way that we handle process locking deserves some consideration. Prior to
328 * the colonization of prwritectl and the support for dynamic sizing of data,
329 * the logic would try to read in a large chunk of data and keep a process
330 * locked throughout that period and then unlock it before reading more data. As
331 * such, we mimic that logically and basically lock it before executing the
332 * first (or any subsequent) command and then only unlock it either when we're
333 * done entirely or we need to allocate memory or read from the process.
334 *
335 * This function is a common implementation for both the ILP32 and LP64 entry
336 * points as they are mostly the same except for the sizing and control function
337 * we call.
338 */
339 int
340 prwritectl_common(vnode_t *vp, uio_t *uiop, cred_t *cr,
341 const proc_control_info_t *proc_info, size_t ninfo, size_t cmdsize,
342 int (*pr_controlf)(long, void *, prnode_t *, cred_t *))
343 {
344 int ret;
345 prwritectl_t prwc;
346
347 VERIFY(cmdsize == sizeof (int32_t) || cmdsize == sizeof (long));
348
349 bzero(&prwc, sizeof (prwc));
350 prwc.prwc_pnp = VTOP(vp);
351 prwc.prwc_uiop = uiop;
352 prwc.prwc_need32 = cmdsize == sizeof (int32_t);
353
354 /*
355 * We may have multiple commands to read and want to try to minimize the
356 * amount of reading that we do. Our callers expect us to have a
357 * contiguous buffer for a command's actual implementation. However, we
358 * must have at least a single long worth of data, otherwise it's not
359 * worth continuing.
360 */
361 while (uiop->uio_resid > 0 || prwc.prwc_curvalid > 0) {
362 const proc_control_info_t *proc_cmd;
363 void *data;
364
365 /*
366 * Check if we have enough data to identify a command. If not,
367 * we read as much as we can in one gulp.
368 */
369 if ((ret = prwritectl_readin(&prwc, cmdsize)) != 0) {
370 goto out;
371 }
372
373 /*
374 * Identify the command and figure out how how much data we
375 * should have read in the kernel. Some commands have a variable
376 * length and we need to make sure the minimum is met before
377 * asking how much there is in general. Most things know what
378 * the minimum length is and this pcs_minf() is not implemented.
379 * However things that are ISA-specific require us to ask that
380 * first.
381 *
382 * We also must be aware that there may not actually be enough
383 * data present in the uio_t.
384 */
385 if ((proc_cmd = prwritectl_cmd_identify(&prwc, proc_info,
386 ninfo, cmdsize)) == NULL) {
387 ret = EINVAL;
388 goto out;
389 }
390
391 size_t needed_data = cmdsize + proc_cmd->pcs_size;
392 if (proc_cmd->pcs_minf != NULL) {
393 size_t min;
394
395 if (!proc_cmd->pcs_minf(&min)) {
396 ret = EINVAL;
397 goto out;
398 }
399
400 needed_data += min;
401 }
402
403 if (proc_cmd->pcs_sizef != NULL) {
404 size_t extra;
405
406 /*
407 * Make sure we have the minimum amount of data that
408 * they asked us to between the static and minf
409 * function.
410 */
411 if ((ret = prwritectl_readin(&prwc, needed_data)) !=
412 0) {
413 goto out;
414 }
415
416 VERIFY3U(prwc.prwc_curvalid, >, cmdsize);
417 data = (void *)((uintptr_t)prwc.prwc_buf + cmdsize);
418 if (!proc_cmd->pcs_sizef(data, &extra)) {
419 ret = EINVAL;
420 goto out;
421 }
422
423 needed_data += extra;
424 }
425
426 /*
427 * Now that we know how much data we're supposed to have,
428 * finally ensure we have the total amount we need.
429 */
430 if ((ret = prwritectl_readin(&prwc, needed_data)) != 0) {
431 goto out;
432 }
433
434 /*
435 * /proc has traditionally assumed control writes come in
436 * multiples of a long. This is 4 bytes for ILP32 and 8 bytes
437 * for LP64. When calculating the required size for a structure,
438 * it would always round that up to the next long. However, the
439 * exact combination of circumstances changes with the
440 * introduction of the 64-bit kernel. For 64-bit processes we
441 * round up when the current command we're processing isn't the
442 * last one.
443 *
444 * Because of our tracking structures and caching we need to
445 * look beyond the uio_t to make this determination. In
446 * particular, the uio_t can have a zero resid, but we may still
447 * have additional data to read as indicated by prwc_curvalid
448 * exceeded the current command size. In the end, we must check
449 * both of these cases.
450 */
451 if ((needed_data % cmdsize) != 0) {
452 if (cmdsize == sizeof (int32_t) ||
453 prwc.prwc_curvalid > needed_data ||
454 prwc.prwc_uiop->uio_resid > 0) {
455 needed_data = P2ROUNDUP(needed_data,
456 cmdsize);
457 if ((ret = prwritectl_readin(&prwc,
458 needed_data)) != 0) {
459 goto out;
460 }
461 }
462 }
463
464 if (!prwc.prwc_locked) {
465 ret = prlock(prwc.prwc_pnp, ZNO);
466 if (ret != 0) {
467 goto out;
468 }
469 prwc.prwc_locked = B_TRUE;
470 }
471
472 /*
473 * Run our actual command. When there is an error, then the
474 * underlying pr_control call will have unlocked the prnode_t
475 * on our behalf. pr_control can return -1, which is a special
476 * error indicating a timeout occurred. In such a case the node
477 * is unlocked; however, that we are supposed to continue
478 * processing commands regardless.
479 *
480 * Finally, we must deal with with one actual wrinkle. The LP64
481 * based logic always guarantees that we have data that is
482 * 8-byte aligned. However, the ILP32 logic is 4-byte aligned
483 * and the rest of the /proc code assumes it can always
484 * dereference it. If we're not aligned, we have to bcopy it to
485 * a temporary buffer.
486 */
487 data = (void *)((uintptr_t)prwc.prwc_buf + cmdsize);
488 #ifdef DEBUG
489 if (cmdsize == sizeof (long)) {
490 VERIFY0((uintptr_t)data % alignof (long));
491 }
492 #endif
493 if (prwc.prwc_need32 && ((uintptr_t)data % alignof (long)) !=
494 0 && needed_data > cmdsize) {
495 bcopy(data, prwc.prwc_buf32, needed_data - cmdsize);
496 data = prwc.prwc_buf32;
497 }
498 ret = pr_controlf(proc_cmd->pcs_cmd, data, prwc.prwc_pnp, cr);
499 if (ret != 0) {
500 prwc.prwc_locked = B_FALSE;
501 if (ret > 0) {
502 goto out;
503 }
504 }
505
506 /*
507 * Finally, now that we have processed this command, we need to
508 * move on. To make our life simple, we basically shift all the
509 * data in our buffer over to indicate it's been consumed. While
510 * a little wasteful, this simplifies buffer management and
511 * guarantees that command processing uses a semi-sanitized
512 * state. Visually, this is the following transformation:
513 *
514 * 0 20 prwc.prwc_curvalid
515 * +------------------+----------------+
516 * | needed_data | remaining_data |
517 * +------------------+----------------+
518 *
519 * In the above example we are shifting all the data over by 20,
520 * so remaining data starts at 0. This leaves us needed_data
521 * bytes to clean up from what was valid.
522 */
523 if (prwc.prwc_buf32 != NULL) {
524 bzero(prwc.prwc_buf32, needed_data - cmdsize);
525 }
526
527 if (prwc.prwc_curvalid > needed_data) {
528 size_t save_size = prwc.prwc_curvalid - needed_data;
529 void *first_save = (void *)((uintptr_t)prwc.prwc_buf +
530 needed_data);
531 memmove(prwc.prwc_buf, first_save, save_size);
532 void *first_zero = (void *)((uintptr_t)prwc.prwc_buf +
533 save_size);
534 bzero(first_zero, needed_data);
535 } else {
536 bzero(prwc.prwc_buf, prwc.prwc_curvalid);
537 }
538 prwc.prwc_curvalid -= needed_data;
539 }
540
541 /*
542 * We've managed to successfully process everything. We can actually say
543 * this was successful now.
544 */
545 ret = 0;
546
547 out:
548 if (prwc.prwc_locked) {
549 prunlock(prwc.prwc_pnp);
550 prwc.prwc_locked = B_FALSE;
551 }
552
553 if (prwc.prwc_buf != NULL) {
554 kmem_free(prwc.prwc_buf, prwc.prwc_buflen);
555 }
556
557 if (prwc.prwc_buf32 != NULL) {
558 VERIFY(prwc.prwc_need32);
559 kmem_free(prwc.prwc_buf32, prwc.prwc_buflen);
560 }
561
562 return (ret);
563 }
564
565 static int
566 pr_control(long cmd, void *generic, prnode_t *pnp, cred_t *cr)
567 {
568 prcommon_t *pcp;
569 proc_t *p;
570 int unlocked;
571 int error = 0;
572 arg_t *argp = generic;
573
574 if (cmd == PCNULL)
575 return (0);
576
577 pcp = pnp->pr_common;
578 p = pcp->prc_proc;
579 ASSERT(p != NULL);
580
581 /* System processes defy control. */
582 if (p->p_flag & SSYS) {
583 prunlock(pnp);
584 return (EBUSY);
585 }
586
587 switch (cmd) {
588
589 default:
590 error = EINVAL;
591 break;
592
593 case PCSTOP: /* direct process or lwp to stop and wait for stop */
594 case PCDSTOP: /* direct process or lwp to stop, don't wait */
595 case PCWSTOP: /* wait for process or lwp to stop */
596 case PCTWSTOP: /* wait for process or lwp to stop, with timeout */
597 {
598 time_t timeo;
599
600 /*
601 * Can't apply to a system process.
602 */
603 if (p->p_as == &kas) {
604 error = EBUSY;
605 break;
606 }
607
608 if (cmd == PCSTOP || cmd == PCDSTOP)
609 pr_stop(pnp);
610
611 if (cmd == PCDSTOP)
612 break;
613
614 /*
615 * If an lwp is waiting for itself or its process,
616 * don't wait. The stopped lwp would never see the
617 * fact that it is stopped.
618 */
619 if ((pcp->prc_flags & PRC_LWP)?
620 (pcp->prc_thread == curthread) : (p == curproc)) {
621 if (cmd == PCWSTOP || cmd == PCTWSTOP)
622 error = EBUSY;
623 break;
624 }
625
626 timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
627 if ((error = pr_wait_stop(pnp, timeo)) != 0)
628 return (error);
629
630 break;
631 }
632
633 case PCRUN: /* make lwp or process runnable */
634 error = pr_setrun(pnp, argp->flags);
635 break;
636
637 case PCSTRACE: /* set signal trace mask */
638 pr_settrace(p, &argp->sigset);
639 break;
640
641 case PCSSIG: /* set current signal */
642 error = pr_setsig(pnp, &argp->siginfo);
643 if (argp->siginfo.si_signo == SIGKILL && error == 0) {
644 prunlock(pnp);
645 pr_wait_die(pnp);
646 return (-1);
647 }
648 break;
649
650 case PCKILL: /* send signal */
651 error = pr_kill(pnp, (int)argp->sig, cr);
652 if (error == 0 && argp->sig == SIGKILL) {
653 prunlock(pnp);
654 pr_wait_die(pnp);
655 return (-1);
656 }
657 break;
658
659 case PCUNKILL: /* delete a pending signal */
660 error = pr_unkill(pnp, (int)argp->sig);
661 break;
662
663 case PCNICE: /* set nice priority */
664 error = pr_nice(p, (int)argp->nice, cr);
665 break;
666
667 case PCSENTRY: /* set syscall entry bit mask */
668 case PCSEXIT: /* set syscall exit bit mask */
669 pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
670 break;
671
672 case PCSET: /* set process flags */
673 error = pr_set(p, argp->flags);
674 break;
675
676 case PCUNSET: /* unset process flags */
677 error = pr_unset(p, argp->flags);
678 break;
679
680 case PCSREG: /* set general registers */
681 {
682 kthread_t *t = pr_thread(pnp);
683
684 if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
685 thread_unlock(t);
686 error = EBUSY;
687 } else {
688 thread_unlock(t);
689 mutex_exit(&p->p_lock);
690 prsetprregs(ttolwp(t), argp->prgregset, 0);
691 mutex_enter(&p->p_lock);
692 }
693 break;
694 }
695
696 case PCSFPREG: /* set floating-point registers */
697 error = pr_setfpregs(pnp, &argp->prfpregset);
698 break;
699
700 case PCSXREG: /* set extra registers */
701 error = pr_setxregs(pnp, (prxregset_t *)argp);
702 break;
703
704 case PCSVADDR: /* set virtual address at which to resume */
705 error = pr_setvaddr(pnp, argp->vaddr);
706 break;
707
708 case PCSHOLD: /* set signal-hold mask */
709 pr_sethold(pnp, &argp->sigset);
710 break;
711
712 case PCSFAULT: /* set mask of traced faults */
713 pr_setfault(p, &argp->fltset);
714 break;
715
716 case PCCSIG: /* clear current signal */
717 error = pr_clearsig(pnp);
718 break;
719
720 case PCCFAULT: /* clear current fault */
721 error = pr_clearflt(pnp);
722 break;
723
724 case PCWATCH: /* set or clear watched areas */
725 error = pr_watch(pnp, &argp->prwatch, &unlocked);
726 if (error && unlocked)
727 return (error);
728 break;
729
730 case PCAGENT: /* create the /proc agent lwp in the target process */
731 error = pr_agent(pnp, argp->prgregset, &unlocked);
732 if (error && unlocked)
733 return (error);
734 break;
735
736 case PCREAD: /* read from the address space */
737 error = pr_rdwr(p, UIO_READ, &argp->priovec);
738 break;
739
740 case PCWRITE: /* write to the address space */
741 error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
742 break;
743
744 case PCSCRED: /* set the process credentials */
745 case PCSCREDX:
746 error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
747 break;
748
749 case PCSPRIV: /* set the process privileges */
750 error = pr_spriv(p, &argp->prpriv, cr);
751 break;
752 case PCSZONE: /* set the process's zoneid credentials */
753 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
754 break;
755 }
756
757 if (error)
758 prunlock(pnp);
759 return (error);
760 }
761
762 int
763 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
764 {
765 return (prwritectl_common(vp, uiop, cr, proc_ctl_info,
766 ARRAY_SIZE(proc_ctl_info), sizeof (long), pr_control));
767 }
768
769 #ifdef _SYSCALL32_IMPL
770
771 typedef union {
772 int32_t sig; /* PCKILL, PCUNKILL */
773 int32_t nice; /* PCNICE */
774 int32_t timeo; /* PCTWSTOP */
775 uint32_t flags; /* PCRUN, PCSET, PCUNSET */
776 caddr32_t vaddr; /* PCSVADDR */
777 siginfo32_t siginfo; /* PCSSIG */
778 sigset_t sigset; /* PCSTRACE, PCSHOLD */
779 fltset_t fltset; /* PCSFAULT */
780 sysset_t sysset; /* PCSENTRY, PCSEXIT */
781 prgregset32_t prgregset; /* PCSREG, PCAGENT */
782 prfpregset32_t prfpregset; /* PCSFPREG */
783 prwatch32_t prwatch; /* PCWATCH */
784 priovec32_t priovec; /* PCREAD, PCWRITE */
785 prcred32_t prcred; /* PCSCRED */
786 prpriv_t prpriv; /* PCSPRIV */
787 int32_t przoneid; /* PCSZONE */
788 } arg32_t;
789
790 static int pr_setfpregs32(prnode_t *, prfpregset32_t *);
791
792 static boolean_t
793 prwritectl_pcscredx32_sizef(const void *datap, size_t *sizep)
794 {
795 const prcred32_t *cred = datap;
796
797 if (cred->pr_ngroups < 0 || cred->pr_ngroups > ngroups_max) {
798 return (B_FALSE);
799 }
800
801 if (cred->pr_ngroups == 0) {
802 *sizep = 0;
803 } else {
804 *sizep = (cred->pr_ngroups - 1) * sizeof (gid32_t);
805 }
806 return (B_TRUE);
807 }
808
809 /*
810 * When dealing with ILP32 code, we are not at a point where we can assume
811 * 64-bit aligned data. Any functions that are operating here must be aware of
812 * that.
813 */
814 static const proc_control_info_t proc_ctl_info32[] = {
815 { PCNULL, 0, NULL, NULL },
816 { PCSTOP, 0, NULL, NULL },
817 { PCDSTOP, 0, NULL, NULL },
818 { PCWSTOP, 0, NULL, NULL },
819 { PCCSIG, 0, NULL, NULL },
820 { PCCFAULT, 0, NULL, NULL },
821 { PCSSIG, sizeof (siginfo32_t), NULL, NULL },
822 { PCTWSTOP, sizeof (int32_t), NULL, NULL },
823 { PCKILL, sizeof (int32_t), NULL, NULL },
824 { PCUNKILL, sizeof (int32_t), NULL, NULL },
825 { PCNICE, sizeof (int32_t), NULL, NULL },
826 { PCRUN, sizeof (uint32_t), NULL, NULL },
827 { PCSET, sizeof (uint32_t), NULL, NULL },
828 { PCUNSET, sizeof (uint32_t), NULL, NULL },
829 { PCSVADDR, sizeof (caddr32_t), NULL, NULL },
830 { PCSTRACE, sizeof (sigset_t), NULL, NULL },
831 { PCSHOLD, sizeof (sigset_t), NULL, NULL },
832 { PCSFAULT, sizeof (fltset_t), NULL, NULL },
833 { PCSENTRY, sizeof (sysset_t), NULL, NULL },
834 { PCSEXIT, sizeof (sysset_t), NULL, NULL },
835 { PCSREG, sizeof (prgregset32_t), NULL, NULL },
836 { PCAGENT, sizeof (prgregset32_t), NULL, NULL },
837 { PCSFPREG, sizeof (prfpregset32_t), NULL, NULL },
838 { PCSXREG, 0, prwriteminxreg, prwritesizexreg },
839 { PCWATCH, sizeof (prwatch32_t), NULL },
840 { PCREAD, sizeof (priovec32_t), NULL, NULL },
841 { PCWRITE, sizeof (priovec32_t), NULL, NULL },
842 { PCSCRED, sizeof (prcred32_t), NULL, NULL },
843 { PCSCREDX, sizeof (prcred32_t), NULL, prwritectl_pcscredx32_sizef },
844 { PCSPRIV, sizeof (prpriv_t), NULL, prwritectl_pcspriv_sizef },
845 { PCSZONE, sizeof (long), NULL },
846 };
847
848 static int
849 pr_control32(long cmd, void *generic, prnode_t *pnp, cred_t *cr)
850 {
851 prcommon_t *pcp;
852 proc_t *p;
853 int unlocked;
854 int error = 0;
855 arg32_t *argp = generic;
856
857 if (cmd == PCNULL)
858 return (0);
859
860 pcp = pnp->pr_common;
861 p = pcp->prc_proc;
862 ASSERT(p != NULL);
863
864 if (p->p_flag & SSYS) {
865 prunlock(pnp);
866 return (EBUSY);
867 }
868
869 switch (cmd) {
870
871 default:
872 error = EINVAL;
873 break;
874
875 case PCSTOP: /* direct process or lwp to stop and wait for stop */
876 case PCDSTOP: /* direct process or lwp to stop, don't wait */
877 case PCWSTOP: /* wait for process or lwp to stop */
878 case PCTWSTOP: /* wait for process or lwp to stop, with timeout */
879 {
880 time_t timeo;
881
882 /*
883 * Can't apply to a system process.
884 */
885 if (p->p_as == &kas) {
886 error = EBUSY;
887 break;
888 }
889
890 if (cmd == PCSTOP || cmd == PCDSTOP)
891 pr_stop(pnp);
892
893 if (cmd == PCDSTOP)
894 break;
895
896 /*
897 * If an lwp is waiting for itself or its process,
898 * don't wait. The lwp will never see the fact that
899 * itself is stopped.
900 */
901 if ((pcp->prc_flags & PRC_LWP)?
902 (pcp->prc_thread == curthread) : (p == curproc)) {
903 if (cmd == PCWSTOP || cmd == PCTWSTOP)
904 error = EBUSY;
905 break;
906 }
907
908 timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
909 if ((error = pr_wait_stop(pnp, timeo)) != 0)
910 return (error);
911
912 break;
913 }
914
915 case PCRUN: /* make lwp or process runnable */
916 error = pr_setrun(pnp, (ulong_t)argp->flags);
917 break;
918
919 case PCSTRACE: /* set signal trace mask */
920 pr_settrace(p, &argp->sigset);
921 break;
922
923 case PCSSIG: /* set current signal */
924 if (PROCESS_NOT_32BIT(p))
925 error = EOVERFLOW;
926 else {
927 int sig = (int)argp->siginfo.si_signo;
928 siginfo_t siginfo;
929
930 bzero(&siginfo, sizeof (siginfo));
931 siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
932 error = pr_setsig(pnp, &siginfo);
933 if (sig == SIGKILL && error == 0) {
934 prunlock(pnp);
935 pr_wait_die(pnp);
936 return (-1);
937 }
938 }
939 break;
940
941 case PCKILL: /* send signal */
942 error = pr_kill(pnp, (int)argp->sig, cr);
943 if (error == 0 && argp->sig == SIGKILL) {
944 prunlock(pnp);
945 pr_wait_die(pnp);
946 return (-1);
947 }
948 break;
949
950 case PCUNKILL: /* delete a pending signal */
951 error = pr_unkill(pnp, (int)argp->sig);
952 break;
953
954 case PCNICE: /* set nice priority */
955 error = pr_nice(p, (int)argp->nice, cr);
956 break;
957
958 case PCSENTRY: /* set syscall entry bit mask */
959 case PCSEXIT: /* set syscall exit bit mask */
960 pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
961 break;
962
963 case PCSET: /* set process flags */
964 error = pr_set(p, (long)argp->flags);
965 break;
966
967 case PCUNSET: /* unset process flags */
968 error = pr_unset(p, (long)argp->flags);
969 break;
970
971 case PCSREG: /* set general registers */
972 if (PROCESS_NOT_32BIT(p))
973 error = EOVERFLOW;
974 else {
975 kthread_t *t = pr_thread(pnp);
976
977 if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
978 thread_unlock(t);
979 error = EBUSY;
980 } else {
981 prgregset_t prgregset;
982 klwp_t *lwp = ttolwp(t);
983
984 thread_unlock(t);
985 mutex_exit(&p->p_lock);
986 prgregset_32ton(lwp, argp->prgregset,
987 prgregset);
988 prsetprregs(lwp, prgregset, 0);
989 mutex_enter(&p->p_lock);
990 }
991 }
992 break;
993
994 case PCSFPREG: /* set floating-point registers */
995 if (PROCESS_NOT_32BIT(p))
996 error = EOVERFLOW;
997 else
998 error = pr_setfpregs32(pnp, &argp->prfpregset);
999 break;
1000
1001 case PCSXREG: /* set extra registers */
1002 if (PROCESS_NOT_32BIT(p))
1003 error = EOVERFLOW;
1004 else
1005 error = pr_setxregs(pnp, (prxregset_t *)argp);
1006 break;
1007
1008 case PCSVADDR: /* set virtual address at which to resume */
1009 if (PROCESS_NOT_32BIT(p))
1010 error = EOVERFLOW;
1011 else
1012 error = pr_setvaddr(pnp,
1013 (caddr_t)(uintptr_t)argp->vaddr);
1014 break;
1015
1016 case PCSHOLD: /* set signal-hold mask */
1017 pr_sethold(pnp, &argp->sigset);
1018 break;
1019
1020 case PCSFAULT: /* set mask of traced faults */
1021 pr_setfault(p, &argp->fltset);
1022 break;
1023
1024 case PCCSIG: /* clear current signal */
1025 error = pr_clearsig(pnp);
1026 break;
1027
1028 case PCCFAULT: /* clear current fault */
1029 error = pr_clearflt(pnp);
1030 break;
1031
1032 case PCWATCH: /* set or clear watched areas */
1033 if (PROCESS_NOT_32BIT(p))
1034 error = EOVERFLOW;
1035 else {
1036 prwatch_t prwatch;
1037
1038 prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
1039 prwatch.pr_size = argp->prwatch.pr_size;
1040 prwatch.pr_wflags = argp->prwatch.pr_wflags;
1041 prwatch.pr_pad = argp->prwatch.pr_pad;
1042 error = pr_watch(pnp, &prwatch, &unlocked);
1043 if (error && unlocked)
1044 return (error);
1045 }
1046 break;
1047
1048 case PCAGENT: /* create the /proc agent lwp in the target process */
1049 if (PROCESS_NOT_32BIT(p))
1050 error = EOVERFLOW;
1051 else {
1052 prgregset_t prgregset;
1053 kthread_t *t = pr_thread(pnp);
1054 klwp_t *lwp = ttolwp(t);
1055 thread_unlock(t);
1056 mutex_exit(&p->p_lock);
1057 prgregset_32ton(lwp, argp->prgregset, prgregset);
1058 mutex_enter(&p->p_lock);
1059 error = pr_agent(pnp, prgregset, &unlocked);
1060 if (error && unlocked)
1061 return (error);
1062 }
1063 break;
1064
1065 case PCREAD: /* read from the address space */
1066 case PCWRITE: /* write to the address space */
1067 if (PROCESS_NOT_32BIT(p) || (pnp->pr_flags & PR_OFFMAX))
1068 error = EOVERFLOW;
1069 else {
1070 enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
1071 priovec_t priovec;
1072
1073 priovec.pio_base =
1074 (void *)(uintptr_t)argp->priovec.pio_base;
1075 priovec.pio_len = (size_t)argp->priovec.pio_len;
1076 priovec.pio_offset = (off_t)
1077 (uint32_t)argp->priovec.pio_offset;
1078 error = pr_rdwr(p, rw, &priovec);
1079 }
1080 break;
1081
1082 case PCSCRED: /* set the process credentials */
1083 case PCSCREDX:
1084 {
1085 /*
1086 * All the fields in these structures are exactly the
1087 * same and so the structures are compatible. In case
1088 * this ever changes, we catch this with the ASSERT
1089 * below.
1090 */
1091 prcred_t *prcred = (prcred_t *)&argp->prcred;
1092
1093 #ifndef __lint
1094 ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
1095 #endif
1096
1097 error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
1098 break;
1099 }
1100
1101 case PCSPRIV: /* set the process privileges */
1102 error = pr_spriv(p, &argp->prpriv, cr);
1103 break;
1104
1105 case PCSZONE: /* set the process's zoneid */
1106 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
1107 break;
1108 }
1109
1110 if (error)
1111 prunlock(pnp);
1112 return (error);
1113 }
1114
1115 int
1116 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
1117 {
1118 return (prwritectl_common(vp, uiop, cr, proc_ctl_info32,
1119 ARRAY_SIZE(proc_ctl_info32), sizeof (int32_t), pr_control32));
1120 }
1121 #endif /* _SYSCALL32_IMPL */
1122
1123 /*
1124 * Return the specific or chosen thread/lwp for a control operation.
1125 * Returns with the thread locked via thread_lock(t).
1126 */
1127 kthread_t *
1128 pr_thread(prnode_t *pnp)
1129 {
1130 prcommon_t *pcp = pnp->pr_common;
1131 kthread_t *t;
1132
1133 if (pcp->prc_flags & PRC_LWP) {
1134 t = pcp->prc_thread;
1135 ASSERT(t != NULL);
1136 thread_lock(t);
1137 } else {
1138 proc_t *p = pcp->prc_proc;
1139 t = prchoose(p); /* returns locked thread */
1140 ASSERT(t != NULL);
1141 }
1142
1143 return (t);
1144 }
1145
1146 /*
1147 * Direct the process or lwp to stop.
1148 */
1149 void
1150 pr_stop(prnode_t *pnp)
1151 {
1152 prcommon_t *pcp = pnp->pr_common;
1153 proc_t *p = pcp->prc_proc;
1154 kthread_t *t;
1155 vnode_t *vp;
1156
1157 /*
1158 * If already stopped, do nothing; otherwise flag
1159 * it to be stopped the next time it tries to run.
1160 * If sleeping at interruptible priority, set it
1161 * running so it will stop within cv_wait_sig().
1162 *
1163 * Take care to cooperate with jobcontrol: if an lwp
1164 * is stopped due to the default action of a jobcontrol
1165 * stop signal, flag it to be stopped the next time it
1166 * starts due to a SIGCONT signal.
1167 */
1168 if (pcp->prc_flags & PRC_LWP)
1169 t = pcp->prc_thread;
1170 else
1171 t = p->p_tlist;
1172 ASSERT(t != NULL);
1173
1174 do {
1175 int notify;
1176
1177 notify = 0;
1178 thread_lock(t);
1179 if (!ISTOPPED(t)) {
1180 t->t_proc_flag |= TP_PRSTOP;
1181 t->t_sig_check = 1; /* do ISSIG */
1182 }
1183
1184 /* Move the thread from wait queue to run queue */
1185 if (ISWAITING(t))
1186 setrun_locked(t);
1187
1188 if (ISWAKEABLE(t)) {
1189 if (t->t_wchan0 == NULL)
1190 setrun_locked(t);
1191 else if (!VSTOPPED(t)) {
1192 /*
1193 * Mark it virtually stopped.
1194 */
1195 t->t_proc_flag |= TP_PRVSTOP;
1196 notify = 1;
1197 }
1198 }
1199 /*
1200 * force the thread into the kernel
1201 * if it is not already there.
1202 */
1203 prpokethread(t);
1204 thread_unlock(t);
1205 if (notify &&
1206 (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1207 prnotify(vp);
1208 if (pcp->prc_flags & PRC_LWP)
1209 break;
1210 } while ((t = t->t_forw) != p->p_tlist);
1211
1212 /*
1213 * We do this just in case the thread we asked
1214 * to stop is in holdlwps() (called from cfork()).
1215 */
1216 cv_broadcast(&p->p_holdlwps);
1217 }
1218
1219 /*
1220 * Sleep until the lwp stops, but cooperate with
1221 * jobcontrol: Don't wake up if the lwp is stopped
1222 * due to the default action of a jobcontrol stop signal.
1223 * If this is the process file descriptor, sleep
1224 * until all of the process's lwps stop.
1225 */
1226 int
1227 pr_wait_stop(prnode_t *pnp, time_t timeo)
1228 {
1229 prcommon_t *pcp = pnp->pr_common;
1230 proc_t *p = pcp->prc_proc;
1231 timestruc_t rqtime;
1232 timestruc_t *rqtp = NULL;
1233 int timecheck = 0;
1234 kthread_t *t;
1235 int error;
1236
1237 if (timeo > 0) { /* millisecond timeout */
1238 /*
1239 * Determine the precise future time of the requested timeout.
1240 */
1241 timestruc_t now;
1242
1243 timecheck = timechanged;
1244 gethrestime(&now);
1245 rqtp = &rqtime;
1246 rqtp->tv_sec = timeo / MILLISEC;
1247 rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1248 timespecadd(rqtp, &now);
1249 }
1250
1251 if (pcp->prc_flags & PRC_LWP) { /* lwp file descriptor */
1252 t = pcp->prc_thread;
1253 ASSERT(t != NULL);
1254 thread_lock(t);
1255 while (!ISTOPPED(t) && !VSTOPPED(t)) {
1256 thread_unlock(t);
1257 mutex_enter(&pcp->prc_mutex);
1258 prunlock(pnp);
1259 error = pr_wait(pcp, rqtp, timecheck);
1260 if (error) /* -1 is timeout */
1261 return (error);
1262 if ((error = prlock(pnp, ZNO)) != 0)
1263 return (error);
1264 ASSERT(p == pcp->prc_proc);
1265 ASSERT(t == pcp->prc_thread);
1266 thread_lock(t);
1267 }
1268 thread_unlock(t);
1269 } else { /* process file descriptor */
1270 t = prchoose(p); /* returns locked thread */
1271 ASSERT(t != NULL);
1272 ASSERT(MUTEX_HELD(&p->p_lock));
1273 while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1274 (p->p_flag & SEXITLWPS)) {
1275 thread_unlock(t);
1276 mutex_enter(&pcp->prc_mutex);
1277 prunlock(pnp);
1278 error = pr_wait(pcp, rqtp, timecheck);
1279 if (error) /* -1 is timeout */
1280 return (error);
1281 if ((error = prlock(pnp, ZNO)) != 0)
1282 return (error);
1283 ASSERT(p == pcp->prc_proc);
1284 t = prchoose(p); /* returns locked t */
1285 ASSERT(t != NULL);
1286 }
1287 thread_unlock(t);
1288 }
1289
1290 ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1291 t != NULL && t->t_state != TS_ZOMB);
1292
1293 return (0);
1294 }
1295
1296 int
1297 pr_setrun(prnode_t *pnp, ulong_t flags)
1298 {
1299 prcommon_t *pcp = pnp->pr_common;
1300 proc_t *p = pcp->prc_proc;
1301 kthread_t *t;
1302 klwp_t *lwp;
1303
1304 /*
1305 * Cannot set an lwp running if it is not stopped.
1306 * Also, no lwp other than the /proc agent lwp can
1307 * be set running so long as the /proc agent lwp exists.
1308 */
1309 t = pr_thread(pnp); /* returns locked thread */
1310 if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1311 !(t->t_proc_flag & TP_PRSTOP)) ||
1312 (p->p_agenttp != NULL &&
1313 (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1314 thread_unlock(t);
1315 return (EBUSY);
1316 }
1317 thread_unlock(t);
1318 if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1319 return (EINVAL);
1320 lwp = ttolwp(t);
1321 if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1322 /*
1323 * Discard current siginfo_t, if any.
1324 */
1325 lwp->lwp_cursig = 0;
1326 lwp->lwp_extsig = 0;
1327 if (lwp->lwp_curinfo) {
1328 siginfofree(lwp->lwp_curinfo);
1329 lwp->lwp_curinfo = NULL;
1330 }
1331 }
1332 if (flags & PRCFAULT)
1333 lwp->lwp_curflt = 0;
1334 /*
1335 * We can't hold p->p_lock when we touch the lwp's registers.
1336 * It may be swapped out and we will get a page fault.
1337 */
1338 if (flags & PRSTEP) {
1339 mutex_exit(&p->p_lock);
1340 prstep(lwp, 0);
1341 mutex_enter(&p->p_lock);
1342 }
1343 if (flags & PRSTOP) {
1344 t->t_proc_flag |= TP_PRSTOP;
1345 t->t_sig_check = 1; /* do ISSIG */
1346 }
1347 if (flags & PRSABORT)
1348 lwp->lwp_sysabort = 1;
1349 thread_lock(t);
1350 if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1351 /*
1352 * Here, we are dealing with a single lwp.
1353 */
1354 if (ISTOPPED(t)) {
1355 t->t_schedflag |= TS_PSTART;
1356 t->t_dtrace_stop = 0;
1357 setrun_locked(t);
1358 } else if (flags & PRSABORT) {
1359 t->t_proc_flag &=
1360 ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1361 setrun_locked(t);
1362 } else if (!(flags & PRSTOP)) {
1363 t->t_proc_flag &=
1364 ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1365 }
1366 thread_unlock(t);
1367 } else {
1368 /*
1369 * Here, we are dealing with the whole process.
1370 */
1371 if (ISTOPPED(t)) {
1372 /*
1373 * The representative lwp is stopped on an event
1374 * of interest. We demote it to PR_REQUESTED and
1375 * choose another representative lwp. If the new
1376 * representative lwp is not stopped on an event of
1377 * interest (other than PR_REQUESTED), we set the
1378 * whole process running, else we leave the process
1379 * stopped showing the next event of interest.
1380 */
1381 kthread_t *tx = NULL;
1382
1383 if (!(flags & PRSABORT) &&
1384 t->t_whystop == PR_SYSENTRY &&
1385 t->t_whatstop == SYS_lwp_exit)
1386 tx = t; /* remember the exiting lwp */
1387 t->t_whystop = PR_REQUESTED;
1388 t->t_whatstop = 0;
1389 thread_unlock(t);
1390 t = prchoose(p); /* returns locked t */
1391 ASSERT(ISTOPPED(t) || VSTOPPED(t));
1392 if (VSTOPPED(t) ||
1393 t->t_whystop == PR_REQUESTED) {
1394 thread_unlock(t);
1395 allsetrun(p);
1396 } else {
1397 thread_unlock(t);
1398 /*
1399 * As a special case, if the old representative
1400 * lwp was stopped on entry to _lwp_exit()
1401 * (and we are not aborting the system call),
1402 * we set the old representative lwp running.
1403 * We do this so that the next process stop
1404 * will find the exiting lwp gone.
1405 */
1406 if (tx != NULL) {
1407 thread_lock(tx);
1408 tx->t_schedflag |= TS_PSTART;
1409 t->t_dtrace_stop = 0;
1410 setrun_locked(tx);
1411 thread_unlock(tx);
1412 }
1413 }
1414 } else {
1415 /*
1416 * No event of interest; set all of the lwps running.
1417 */
1418 if (flags & PRSABORT) {
1419 t->t_proc_flag &=
1420 ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1421 setrun_locked(t);
1422 }
1423 thread_unlock(t);
1424 allsetrun(p);
1425 }
1426 }
1427 return (0);
1428 }
1429
1430 /*
1431 * Wait until process/lwp stops or until timer expires.
1432 * Return EINTR for an interruption, -1 for timeout, else 0.
1433 */
1434 int
1435 pr_wait(prcommon_t *pcp, /* prcommon referring to process/lwp */
1436 timestruc_t *ts, /* absolute time of timeout, if any */
1437 int timecheck)
1438 {
1439 int rval;
1440
1441 ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1442 rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1443 mutex_exit(&pcp->prc_mutex);
1444 switch (rval) {
1445 case 0:
1446 return (EINTR);
1447 case -1:
1448 return (-1);
1449 default:
1450 return (0);
1451 }
1452 }
1453
1454 /*
1455 * Make all threads in the process runnable.
1456 */
1457 void
1458 allsetrun(proc_t *p)
1459 {
1460 kthread_t *t;
1461
1462 ASSERT(MUTEX_HELD(&p->p_lock));
1463
1464 if ((t = p->p_tlist) != NULL) {
1465 do {
1466 thread_lock(t);
1467 ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1468 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1469 if (ISTOPPED(t)) {
1470 t->t_schedflag |= TS_PSTART;
1471 t->t_dtrace_stop = 0;
1472 setrun_locked(t);
1473 }
1474 thread_unlock(t);
1475 } while ((t = t->t_forw) != p->p_tlist);
1476 }
1477 }
1478
1479 /*
1480 * Wait for the process to die.
1481 * We do this after sending SIGKILL because we know it will
1482 * die soon and we want subsequent operations to return ENOENT.
1483 */
1484 void
1485 pr_wait_die(prnode_t *pnp)
1486 {
1487 proc_t *p;
1488
1489 mutex_enter(&pidlock);
1490 while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1491 if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1492 break;
1493 }
1494 mutex_exit(&pidlock);
1495 }
1496
1497 static void
1498 pr_settrace(proc_t *p, sigset_t *sp)
1499 {
1500 prdelset(sp, SIGKILL);
1501 prassignset(&p->p_sigmask, sp);
1502 if (!sigisempty(&p->p_sigmask))
1503 p->p_proc_flag |= P_PR_TRACE;
1504 else if (prisempty(&p->p_fltmask)) {
1505 user_t *up = PTOU(p);
1506 if (up->u_systrap == 0)
1507 p->p_proc_flag &= ~P_PR_TRACE;
1508 }
1509 }
1510
1511 int
1512 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1513 {
1514 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1515 int sig = sip->si_signo;
1516 prcommon_t *pcp = pnp->pr_common;
1517 proc_t *p = pcp->prc_proc;
1518 kthread_t *t;
1519 klwp_t *lwp;
1520 int error = 0;
1521
1522 t = pr_thread(pnp); /* returns locked thread */
1523 thread_unlock(t);
1524 lwp = ttolwp(t);
1525 if (sig < 0 || sig >= nsig)
1526 /* Zero allowed here */
1527 error = EINVAL;
1528 else if (lwp->lwp_cursig == SIGKILL)
1529 /* "can't happen", but just in case */
1530 error = EBUSY;
1531 else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1532 lwp->lwp_extsig = 0;
1533 /*
1534 * Discard current siginfo_t, if any.
1535 */
1536 if (lwp->lwp_curinfo) {
1537 siginfofree(lwp->lwp_curinfo);
1538 lwp->lwp_curinfo = NULL;
1539 }
1540 } else {
1541 kthread_t *tx;
1542 sigqueue_t *sqp;
1543
1544 /* drop p_lock to do kmem_alloc(KM_SLEEP) */
1545 mutex_exit(&p->p_lock);
1546 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1547 mutex_enter(&p->p_lock);
1548
1549 if (lwp->lwp_curinfo == NULL)
1550 lwp->lwp_curinfo = sqp;
1551 else
1552 kmem_free(sqp, sizeof (sigqueue_t));
1553 /*
1554 * Copy contents of info to current siginfo_t.
1555 */
1556 bcopy(sip, &lwp->lwp_curinfo->sq_info,
1557 sizeof (lwp->lwp_curinfo->sq_info));
1558 /*
1559 * Prevent contents published by si_zoneid-unaware /proc
1560 * consumers from being incorrectly filtered. Because
1561 * an uninitialized si_zoneid is the same as
1562 * GLOBAL_ZONEID, this means that you can't pr_setsig a
1563 * process in a non-global zone with a siginfo which
1564 * appears to come from the global zone.
1565 */
1566 if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1567 lwp->lwp_curinfo->sq_info.si_zoneid =
1568 p->p_zone->zone_id;
1569 /*
1570 * Side-effects for SIGKILL and jobcontrol signals.
1571 */
1572 if (sig == SIGKILL) {
1573 p->p_flag |= SKILLED;
1574 p->p_flag &= ~SEXTKILLED;
1575 } else if (sig == SIGCONT) {
1576 p->p_flag |= SSCONT;
1577 sigdelq(p, NULL, SIGSTOP);
1578 sigdelq(p, NULL, SIGTSTP);
1579 sigdelq(p, NULL, SIGTTOU);
1580 sigdelq(p, NULL, SIGTTIN);
1581 sigdiffset(&p->p_sig, &stopdefault);
1582 sigdiffset(&p->p_extsig, &stopdefault);
1583 if ((tx = p->p_tlist) != NULL) {
1584 do {
1585 sigdelq(p, tx, SIGSTOP);
1586 sigdelq(p, tx, SIGTSTP);
1587 sigdelq(p, tx, SIGTTOU);
1588 sigdelq(p, tx, SIGTTIN);
1589 sigdiffset(&tx->t_sig, &stopdefault);
1590 sigdiffset(&tx->t_extsig, &stopdefault);
1591 } while ((tx = tx->t_forw) != p->p_tlist);
1592 }
1593 } else if (sigismember(&stopdefault, sig)) {
1594 if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1595 (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1596 p->p_flag &= ~SSCONT;
1597 sigdelq(p, NULL, SIGCONT);
1598 sigdelset(&p->p_sig, SIGCONT);
1599 sigdelset(&p->p_extsig, SIGCONT);
1600 if ((tx = p->p_tlist) != NULL) {
1601 do {
1602 sigdelq(p, tx, SIGCONT);
1603 sigdelset(&tx->t_sig, SIGCONT);
1604 sigdelset(&tx->t_extsig, SIGCONT);
1605 } while ((tx = tx->t_forw) != p->p_tlist);
1606 }
1607 }
1608 thread_lock(t);
1609 if (ISWAKEABLE(t) || ISWAITING(t)) {
1610 /* Set signaled sleeping/waiting lwp running */
1611 setrun_locked(t);
1612 } else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1613 /* If SIGKILL, set stopped lwp running */
1614 p->p_stopsig = 0;
1615 t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
1616 t->t_dtrace_stop = 0;
1617 setrun_locked(t);
1618 }
1619 t->t_sig_check = 1; /* so ISSIG will be done */
1620 thread_unlock(t);
1621 /*
1622 * More jobcontrol side-effects.
1623 */
1624 if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1625 p->p_stopsig = 0;
1626 do {
1627 thread_lock(tx);
1628 if (tx->t_state == TS_STOPPED &&
1629 tx->t_whystop == PR_JOBCONTROL) {
1630 tx->t_schedflag |= TS_XSTART;
1631 setrun_locked(tx);
1632 }
1633 thread_unlock(tx);
1634 } while ((tx = tx->t_forw) != p->p_tlist);
1635 }
1636 }
1637 return (error);
1638 }
1639
1640 int
1641 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1642 {
1643 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1644 prcommon_t *pcp = pnp->pr_common;
1645 proc_t *p = pcp->prc_proc;
1646 k_siginfo_t info;
1647
1648 if (sig <= 0 || sig >= nsig)
1649 return (EINVAL);
1650
1651 bzero(&info, sizeof (info));
1652 info.si_signo = sig;
1653 info.si_code = SI_USER;
1654 info.si_pid = curproc->p_pid;
1655 info.si_ctid = PRCTID(curproc);
1656 info.si_zoneid = getzoneid();
1657 info.si_uid = crgetruid(cr);
1658 sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1659 pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1660
1661 return (0);
1662 }
1663
1664 int
1665 pr_unkill(prnode_t *pnp, int sig)
1666 {
1667 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1668 prcommon_t *pcp = pnp->pr_common;
1669 proc_t *p = pcp->prc_proc;
1670 sigqueue_t *infop = NULL;
1671
1672 if (sig <= 0 || sig >= nsig || sig == SIGKILL)
1673 return (EINVAL);
1674
1675 if (pcp->prc_flags & PRC_LWP)
1676 sigdeq(p, pcp->prc_thread, sig, &infop);
1677 else
1678 sigdeq(p, NULL, sig, &infop);
1679
1680 if (infop)
1681 siginfofree(infop);
1682
1683 return (0);
1684 }
1685
1686 int
1687 pr_nice(proc_t *p, int nice, cred_t *cr)
1688 {
1689 kthread_t *t;
1690 int err;
1691 int error = 0;
1692
1693 t = p->p_tlist;
1694 do {
1695 ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1696 err = CL_DONICE(t, cr, nice, (int *)NULL);
1697 schedctl_set_cidpri(t);
1698 if (error == 0)
1699 error = err;
1700 } while ((t = t->t_forw) != p->p_tlist);
1701
1702 return (error);
1703 }
1704
1705 void
1706 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1707 {
1708 user_t *up = PTOU(p);
1709
1710 if (entry) {
1711 prassignset(&up->u_entrymask, sysset);
1712 } else {
1713 prassignset(&up->u_exitmask, sysset);
1714 }
1715 if (!prisempty(&up->u_entrymask) ||
1716 !prisempty(&up->u_exitmask)) {
1717 up->u_systrap = 1;
1718 p->p_proc_flag |= P_PR_TRACE;
1719 set_proc_sys(p); /* set pre and post-sys flags */
1720 } else {
1721 up->u_systrap = 0;
1722 if (sigisempty(&p->p_sigmask) &&
1723 prisempty(&p->p_fltmask))
1724 p->p_proc_flag &= ~P_PR_TRACE;
1725 }
1726 }
1727
1728 #define ALLFLAGS \
1729 (PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1730
1731 int
1732 pr_set(proc_t *p, long flags)
1733 {
1734 if ((p->p_flag & SSYS) || p->p_as == &kas)
1735 return (EBUSY);
1736
1737 if (flags & ~ALLFLAGS)
1738 return (EINVAL);
1739
1740 if (flags & PR_FORK)
1741 p->p_proc_flag |= P_PR_FORK;
1742 if (flags & PR_RLC)
1743 p->p_proc_flag |= P_PR_RUNLCL;
1744 if (flags & PR_KLC)
1745 p->p_proc_flag |= P_PR_KILLCL;
1746 if (flags & PR_ASYNC)
1747 p->p_proc_flag |= P_PR_ASYNC;
1748 if (flags & PR_BPTADJ)
1749 p->p_proc_flag |= P_PR_BPTADJ;
1750 if (flags & PR_MSACCT)
1751 if ((p->p_flag & SMSACCT) == 0)
1752 estimate_msacct(p->p_tlist, gethrtime());
1753 if (flags & PR_MSFORK)
1754 p->p_flag |= SMSFORK;
1755 if (flags & PR_PTRACE) {
1756 p->p_proc_flag |= P_PR_PTRACE;
1757 /* ptraced process must die if parent dead */
1758 if (p->p_ppid == 1)
1759 sigtoproc(p, NULL, SIGKILL);
1760 }
1761
1762 return (0);
1763 }
1764
1765 int
1766 pr_unset(proc_t *p, long flags)
1767 {
1768 if ((p->p_flag & SSYS) || p->p_as == &kas)
1769 return (EBUSY);
1770
1771 if (flags & ~ALLFLAGS)
1772 return (EINVAL);
1773
1774 if (flags & PR_FORK)
1775 p->p_proc_flag &= ~P_PR_FORK;
1776 if (flags & PR_RLC)
1777 p->p_proc_flag &= ~P_PR_RUNLCL;
1778 if (flags & PR_KLC)
1779 p->p_proc_flag &= ~P_PR_KILLCL;
1780 if (flags & PR_ASYNC)
1781 p->p_proc_flag &= ~P_PR_ASYNC;
1782 if (flags & PR_BPTADJ)
1783 p->p_proc_flag &= ~P_PR_BPTADJ;
1784 if (flags & PR_MSACCT)
1785 disable_msacct(p);
1786 if (flags & PR_MSFORK)
1787 p->p_flag &= ~SMSFORK;
1788 if (flags & PR_PTRACE)
1789 p->p_proc_flag &= ~P_PR_PTRACE;
1790
1791 return (0);
1792 }
1793
1794 static int
1795 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1796 {
1797 proc_t *p = pnp->pr_common->prc_proc;
1798 kthread_t *t = pr_thread(pnp); /* returns locked thread */
1799
1800 if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1801 thread_unlock(t);
1802 return (EBUSY);
1803 }
1804 if (!prhasfp()) {
1805 thread_unlock(t);
1806 return (EINVAL); /* No FP support */
1807 }
1808
1809 /* drop p_lock while touching the lwp's stack */
1810 thread_unlock(t);
1811 mutex_exit(&p->p_lock);
1812 prsetprfpregs(ttolwp(t), prfpregset);
1813 mutex_enter(&p->p_lock);
1814
1815 return (0);
1816 }
1817
1818 #ifdef _SYSCALL32_IMPL
1819 static int
1820 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1821 {
1822 proc_t *p = pnp->pr_common->prc_proc;
1823 kthread_t *t = pr_thread(pnp); /* returns locked thread */
1824
1825 if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1826 thread_unlock(t);
1827 return (EBUSY);
1828 }
1829 if (!prhasfp()) {
1830 thread_unlock(t);
1831 return (EINVAL); /* No FP support */
1832 }
1833
1834 /* drop p_lock while touching the lwp's stack */
1835 thread_unlock(t);
1836 mutex_exit(&p->p_lock);
1837 prsetprfpregs32(ttolwp(t), prfpregset);
1838 mutex_enter(&p->p_lock);
1839
1840 return (0);
1841 }
1842 #endif /* _SYSCALL32_IMPL */
1843
1844 /* ARGSUSED */
1845 static int
1846 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1847 {
1848 int error;
1849 proc_t *p = pnp->pr_common->prc_proc;
1850 kthread_t *t = pr_thread(pnp); /* returns locked thread */
1851
1852 if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1853 thread_unlock(t);
1854 return (EBUSY);
1855 }
1856 thread_unlock(t);
1857
1858 if (!prhasx(p))
1859 return (EINVAL); /* No extra register support */
1860
1861 /* drop p_lock while touching the lwp's stack */
1862 mutex_exit(&p->p_lock);
1863 error = prsetprxregs(ttolwp(t), prxregset);
1864 mutex_enter(&p->p_lock);
1865
1866 return (error);
1867 }
1868
1869 static int
1870 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1871 {
1872 proc_t *p = pnp->pr_common->prc_proc;
1873 kthread_t *t = pr_thread(pnp); /* returns locked thread */
1874
1875 if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1876 thread_unlock(t);
1877 return (EBUSY);
1878 }
1879
1880 /* drop p_lock while touching the lwp's stack */
1881 thread_unlock(t);
1882 mutex_exit(&p->p_lock);
1883 prsvaddr(ttolwp(t), vaddr);
1884 mutex_enter(&p->p_lock);
1885
1886 return (0);
1887 }
1888
1889 void
1890 pr_sethold(prnode_t *pnp, sigset_t *sp)
1891 {
1892 proc_t *p = pnp->pr_common->prc_proc;
1893 kthread_t *t = pr_thread(pnp); /* returns locked thread */
1894
1895 schedctl_finish_sigblock(t);
1896 sigutok(sp, &t->t_hold);
1897 if (ISWAKEABLE(t) &&
1898 (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1899 setrun_locked(t);
1900 t->t_sig_check = 1; /* so thread will see new holdmask */
1901 thread_unlock(t);
1902 }
1903
1904 void
1905 pr_setfault(proc_t *p, fltset_t *fltp)
1906 {
1907 prassignset(&p->p_fltmask, fltp);
1908 if (!prisempty(&p->p_fltmask))
1909 p->p_proc_flag |= P_PR_TRACE;
1910 else if (sigisempty(&p->p_sigmask)) {
1911 user_t *up = PTOU(p);
1912 if (up->u_systrap == 0)
1913 p->p_proc_flag &= ~P_PR_TRACE;
1914 }
1915 }
1916
1917 static int
1918 pr_clearsig(prnode_t *pnp)
1919 {
1920 kthread_t *t = pr_thread(pnp); /* returns locked thread */
1921 klwp_t *lwp = ttolwp(t);
1922
1923 thread_unlock(t);
1924 if (lwp->lwp_cursig == SIGKILL)
1925 return (EBUSY);
1926
1927 /*
1928 * Discard current siginfo_t, if any.
1929 */
1930 lwp->lwp_cursig = 0;
1931 lwp->lwp_extsig = 0;
1932 if (lwp->lwp_curinfo) {
1933 siginfofree(lwp->lwp_curinfo);
1934 lwp->lwp_curinfo = NULL;
1935 }
1936
1937 return (0);
1938 }
1939
1940 static int
1941 pr_clearflt(prnode_t *pnp)
1942 {
1943 kthread_t *t = pr_thread(pnp); /* returns locked thread */
1944
1945 thread_unlock(t);
1946 ttolwp(t)->lwp_curflt = 0;
1947
1948 return (0);
1949 }
1950
1951 static int
1952 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1953 {
1954 proc_t *p = pnp->pr_common->prc_proc;
1955 struct as *as = p->p_as;
1956 uintptr_t vaddr = pwp->pr_vaddr;
1957 size_t size = pwp->pr_size;
1958 int wflags = pwp->pr_wflags;
1959 ulong_t newpage = 0;
1960 struct watched_area *pwa;
1961 int error;
1962
1963 *unlocked = 0;
1964
1965 /*
1966 * Can't apply to a system process.
1967 */
1968 if ((p->p_flag & SSYS) || p->p_as == &kas)
1969 return (EBUSY);
1970
1971 /*
1972 * Verify that the address range does not wrap
1973 * and that only the proper flags were specified.
1974 */
1975 if ((wflags & ~WA_TRAPAFTER) == 0)
1976 size = 0;
1977 if (vaddr + size < vaddr ||
1978 (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1979 ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1980 return (EINVAL);
1981
1982 /*
1983 * Don't let the address range go above as->a_userlimit.
1984 * There is no error here, just a limitation.
1985 */
1986 if (vaddr >= (uintptr_t)as->a_userlimit)
1987 return (0);
1988 if (vaddr + size > (uintptr_t)as->a_userlimit)
1989 size = (uintptr_t)as->a_userlimit - vaddr;
1990
1991 /*
1992 * Compute maximum number of pages this will add.
1993 */
1994 if ((wflags & ~WA_TRAPAFTER) != 0) {
1995 ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1996 newpage = btopr(pagespan);
1997 if (newpage > 2 * prnwatch)
1998 return (E2BIG);
1999 }
2000
2001 /*
2002 * Force the process to be fully stopped.
2003 */
2004 if (p == curproc) {
2005 prunlock(pnp);
2006 while (holdwatch() != 0)
2007 continue;
2008 if ((error = prlock(pnp, ZNO)) != 0) {
2009 continuelwps(p);
2010 *unlocked = 1;
2011 return (error);
2012 }
2013 } else {
2014 pauselwps(p);
2015 while (pr_allstopped(p, 0) > 0) {
2016 /*
2017 * This cv/mutex pair is persistent even
2018 * if the process disappears after we
2019 * unmark it and drop p->p_lock.
2020 */
2021 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2022 kmutex_t *mp = &p->p_lock;
2023
2024 prunmark(p);
2025 (void) cv_wait(cv, mp);
2026 mutex_exit(mp);
2027 if ((error = prlock(pnp, ZNO)) != 0) {
2028 /*
2029 * Unpause the process if it exists.
2030 */
2031 p = pr_p_lock(pnp);
2032 mutex_exit(&pr_pidlock);
2033 if (p != NULL) {
2034 unpauselwps(p);
2035 prunlock(pnp);
2036 }
2037 *unlocked = 1;
2038 return (error);
2039 }
2040 }
2041 }
2042
2043 /*
2044 * Drop p->p_lock in order to perform the rest of this.
2045 * The process is still locked with the P_PR_LOCK flag.
2046 */
2047 mutex_exit(&p->p_lock);
2048
2049 pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
2050 pwa->wa_vaddr = (caddr_t)vaddr;
2051 pwa->wa_eaddr = (caddr_t)vaddr + size;
2052 pwa->wa_flags = (ulong_t)wflags;
2053
2054 error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
2055 clear_watched_area(p, pwa) : set_watched_area(p, pwa);
2056
2057 if (p == curproc) {
2058 setallwatch();
2059 mutex_enter(&p->p_lock);
2060 continuelwps(p);
2061 } else {
2062 mutex_enter(&p->p_lock);
2063 unpauselwps(p);
2064 }
2065
2066 return (error);
2067 }
2068
2069 /* jobcontrol stopped, but with a /proc directed stop in effect */
2070 #define JDSTOPPED(t) \
2071 ((t)->t_state == TS_STOPPED && \
2072 (t)->t_whystop == PR_JOBCONTROL && \
2073 ((t)->t_proc_flag & TP_PRSTOP))
2074
2075 /*
2076 * pr_agent() creates the agent lwp. If the process is exiting while
2077 * we are creating an agent lwp, then exitlwps() waits until the
2078 * agent has been created using prbarrier().
2079 */
2080 static int
2081 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
2082 {
2083 proc_t *p = pnp->pr_common->prc_proc;
2084 prcommon_t *pcp;
2085 kthread_t *t;
2086 kthread_t *ct;
2087 klwp_t *clwp;
2088 k_sigset_t smask;
2089 int cid;
2090 void *bufp = NULL;
2091 int error;
2092
2093 *unlocked = 0;
2094
2095 /*
2096 * Cannot create the /proc agent lwp if :-
2097 * - the process is not fully stopped or directed to stop.
2098 * - there is an agent lwp already.
2099 * - the process has been killed.
2100 * - the process is exiting.
2101 * - it's a vfork(2) parent.
2102 */
2103 t = prchoose(p); /* returns locked thread */
2104 ASSERT(t != NULL);
2105
2106 if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
2107 p->p_agenttp != NULL ||
2108 (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
2109 thread_unlock(t);
2110 return (EBUSY);
2111 }
2112
2113 thread_unlock(t);
2114 mutex_exit(&p->p_lock);
2115
2116 sigfillset(&smask);
2117 sigdiffset(&smask, &cantmask);
2118 clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2119 t->t_pri, &smask, NOCLASS, 0);
2120 if (clwp == NULL) {
2121 mutex_enter(&p->p_lock);
2122 return (ENOMEM);
2123 }
2124 prsetprregs(clwp, prgregset, 1);
2125
2126 /*
2127 * Because abandoning the agent inside the target process leads to
2128 * a state that is essentially undebuggable, we record the psinfo of
2129 * the process creating the agent and hang that off of the lwp.
2130 */
2131 clwp->lwp_spymaster = kmem_zalloc(sizeof (psinfo_t), KM_SLEEP);
2132 mutex_enter(&curproc->p_lock);
2133 prgetpsinfo(curproc, clwp->lwp_spymaster);
2134 mutex_exit(&curproc->p_lock);
2135
2136 /*
2137 * We overload pr_time in the spymaster to denote the time at which the
2138 * agent was created.
2139 */
2140 gethrestime(&clwp->lwp_spymaster->pr_time);
2141
2142 retry:
2143 cid = t->t_cid;
2144 (void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2145 mutex_enter(&p->p_lock);
2146 if (cid != t->t_cid) {
2147 /*
2148 * Someone just changed this thread's scheduling class,
2149 * so try pre-allocating the buffer again. Hopefully we
2150 * don't hit this often.
2151 */
2152 mutex_exit(&p->p_lock);
2153 CL_FREE(cid, bufp);
2154 goto retry;
2155 }
2156
2157 clwp->lwp_ap = clwp->lwp_arg;
2158 clwp->lwp_eosys = NORMALRETURN;
2159 ct = lwptot(clwp);
2160 ct->t_clfuncs = t->t_clfuncs;
2161 CL_FORK(t, ct, bufp);
2162 ct->t_cid = t->t_cid;
2163 ct->t_proc_flag |= TP_PRSTOP;
2164 /*
2165 * Setting t_sysnum to zero causes post_syscall()
2166 * to bypass all syscall checks and go directly to
2167 * if (issig()) psig();
2168 * so that the agent lwp will stop in issig_forreal()
2169 * showing PR_REQUESTED.
2170 */
2171 ct->t_sysnum = 0;
2172 ct->t_post_sys = 1;
2173 ct->t_sig_check = 1;
2174 p->p_agenttp = ct;
2175 ct->t_proc_flag &= ~TP_HOLDLWP;
2176
2177 pcp = pnp->pr_pcommon;
2178 mutex_enter(&pcp->prc_mutex);
2179
2180 lwp_create_done(ct);
2181
2182 /*
2183 * Don't return until the agent is stopped on PR_REQUESTED.
2184 */
2185
2186 for (;;) {
2187 prunlock(pnp);
2188 *unlocked = 1;
2189
2190 /*
2191 * Wait for the agent to stop and notify us.
2192 * If we've been interrupted, return that information.
2193 */
2194 error = pr_wait(pcp, NULL, 0);
2195 if (error == EINTR) {
2196 error = 0;
2197 break;
2198 }
2199
2200 /*
2201 * Confirm that the agent LWP has stopped.
2202 */
2203
2204 if ((error = prlock(pnp, ZNO)) != 0)
2205 break;
2206 *unlocked = 0;
2207
2208 /*
2209 * Since we dropped the lock on the process, the agent
2210 * may have disappeared or changed. Grab the current
2211 * agent and check fail if it has disappeared.
2212 */
2213 if ((ct = p->p_agenttp) == NULL) {
2214 error = ENOENT;
2215 break;
2216 }
2217
2218 mutex_enter(&pcp->prc_mutex);
2219 thread_lock(ct);
2220
2221 if (ISTOPPED(ct)) {
2222 thread_unlock(ct);
2223 mutex_exit(&pcp->prc_mutex);
2224 break;
2225 }
2226
2227 thread_unlock(ct);
2228 }
2229
2230 return (error ? error : -1);
2231 }
2232
2233 static int
2234 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2235 {
2236 caddr_t base = (caddr_t)pio->pio_base;
2237 size_t cnt = pio->pio_len;
2238 uintptr_t offset = (uintptr_t)pio->pio_offset;
2239 struct uio auio;
2240 struct iovec aiov;
2241 int error = 0;
2242
2243 if ((p->p_flag & SSYS) || p->p_as == &kas)
2244 error = EIO;
2245 else if ((base + cnt) < base || (offset + cnt) < offset)
2246 error = EINVAL;
2247 else if (cnt != 0) {
2248 aiov.iov_base = base;
2249 aiov.iov_len = cnt;
2250
2251 auio.uio_loffset = offset;
2252 auio.uio_iov = &aiov;
2253 auio.uio_iovcnt = 1;
2254 auio.uio_resid = cnt;
2255 auio.uio_segflg = UIO_USERSPACE;
2256 auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2257 auio.uio_fmode = FREAD|FWRITE;
2258 auio.uio_extflg = UIO_COPY_DEFAULT;
2259
2260 mutex_exit(&p->p_lock);
2261 error = prusrio(p, rw, &auio, 0);
2262 mutex_enter(&p->p_lock);
2263
2264 /*
2265 * We have no way to return the i/o count,
2266 * like read() or write() would do, so we
2267 * return an error if the i/o was truncated.
2268 */
2269 if (auio.uio_resid != 0 && error == 0)
2270 error = EIO;
2271 }
2272
2273 return (error);
2274 }
2275
2276 static int
2277 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2278 {
2279 kthread_t *t;
2280 cred_t *oldcred;
2281 cred_t *newcred;
2282 uid_t oldruid;
2283 int error;
2284 zone_t *zone = crgetzone(cr);
2285
2286 if (!VALID_UID(prcred->pr_euid, zone) ||
2287 !VALID_UID(prcred->pr_ruid, zone) ||
2288 !VALID_UID(prcred->pr_suid, zone) ||
2289 !VALID_GID(prcred->pr_egid, zone) ||
2290 !VALID_GID(prcred->pr_rgid, zone) ||
2291 !VALID_GID(prcred->pr_sgid, zone))
2292 return (EINVAL);
2293
2294 if (dogrps) {
2295 int ngrp = prcred->pr_ngroups;
2296 int i;
2297
2298 if (ngrp < 0 || ngrp > ngroups_max)
2299 return (EINVAL);
2300
2301 for (i = 0; i < ngrp; i++) {
2302 if (!VALID_GID(prcred->pr_groups[i], zone))
2303 return (EINVAL);
2304 }
2305 }
2306
2307 error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2308
2309 if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2310 error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2311
2312 if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2313 prcred->pr_suid != prcred->pr_ruid)
2314 error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2315
2316 if (error)
2317 return (error);
2318
2319 mutex_exit(&p->p_lock);
2320
2321 /* hold old cred so it doesn't disappear while we dup it */
2322 mutex_enter(&p->p_crlock);
2323 crhold(oldcred = p->p_cred);
2324 mutex_exit(&p->p_crlock);
2325 newcred = crdup(oldcred);
2326 oldruid = crgetruid(oldcred);
2327 crfree(oldcred);
2328
2329 /* Error checking done above */
2330 (void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2331 prcred->pr_suid);
2332 (void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2333 prcred->pr_sgid);
2334
2335 if (dogrps) {
2336 (void) crsetgroups(newcred, prcred->pr_ngroups,
2337 prcred->pr_groups);
2338
2339 }
2340
2341 mutex_enter(&p->p_crlock);
2342 oldcred = p->p_cred;
2343 p->p_cred = newcred;
2344 mutex_exit(&p->p_crlock);
2345 crfree(oldcred);
2346
2347 /*
2348 * Keep count of processes per uid consistent.
2349 */
2350 if (oldruid != prcred->pr_ruid) {
2351 zoneid_t zoneid = crgetzoneid(newcred);
2352
2353 mutex_enter(&pidlock);
2354 upcount_dec(oldruid, zoneid);
2355 upcount_inc(prcred->pr_ruid, zoneid);
2356 mutex_exit(&pidlock);
2357 }
2358
2359 /*
2360 * Broadcast the cred change to the threads.
2361 */
2362 mutex_enter(&p->p_lock);
2363 t = p->p_tlist;
2364 do {
2365 t->t_pre_sys = 1; /* so syscall will get new cred */
2366 } while ((t = t->t_forw) != p->p_tlist);
2367
2368 return (0);
2369 }
2370
2371 /*
2372 * Change process credentials to specified zone. Used to temporarily
2373 * set a process to run in the global zone; only transitions between
2374 * the process's actual zone and the global zone are allowed.
2375 */
2376 static int
2377 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2378 {
2379 kthread_t *t;
2380 cred_t *oldcred;
2381 cred_t *newcred;
2382 zone_t *zptr;
2383 zoneid_t oldzoneid;
2384
2385 if (secpolicy_zone_config(cr) != 0)
2386 return (EPERM);
2387 if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2388 return (EINVAL);
2389 /*
2390 * We cannot hold p_lock when we call zone_find_by_id since that can
2391 * lead to a deadlock. zone_find_by_id() takes zonehash_lock.
2392 * zone_enter() can hold the zonehash_lock and needs p_lock when it
2393 * calls task_join.
2394 */
2395 mutex_exit(&p->p_lock);
2396 if ((zptr = zone_find_by_id(zoneid)) == NULL) {
2397 mutex_enter(&p->p_lock);
2398 return (EINVAL);
2399 }
2400 mutex_enter(&p->p_crlock);
2401 oldcred = p->p_cred;
2402 crhold(oldcred);
2403 mutex_exit(&p->p_crlock);
2404 newcred = crdup(oldcred);
2405 oldzoneid = crgetzoneid(oldcred);
2406 crfree(oldcred);
2407
2408 crsetzone(newcred, zptr);
2409 zone_rele(zptr);
2410
2411 mutex_enter(&p->p_crlock);
2412 oldcred = p->p_cred;
2413 p->p_cred = newcred;
2414 mutex_exit(&p->p_crlock);
2415 crfree(oldcred);
2416
2417 /*
2418 * The target process is changing zones (according to its cred), so
2419 * update the per-zone upcounts, which are based on process creds.
2420 */
2421 if (oldzoneid != zoneid) {
2422 uid_t ruid = crgetruid(newcred);
2423
2424 mutex_enter(&pidlock);
2425 upcount_dec(ruid, oldzoneid);
2426 upcount_inc(ruid, zoneid);
2427 mutex_exit(&pidlock);
2428 }
2429 /*
2430 * Broadcast the cred change to the threads.
2431 */
2432 mutex_enter(&p->p_lock);
2433 t = p->p_tlist;
2434 do {
2435 t->t_pre_sys = 1; /* so syscall will get new cred */
2436 } while ((t = t->t_forw) != p->p_tlist);
2437
2438 return (0);
2439 }
2440
2441 static int
2442 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2443 {
2444 kthread_t *t;
2445 int err;
2446
2447 ASSERT(MUTEX_HELD(&p->p_lock));
2448
2449 if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2450 /*
2451 * Broadcast the cred change to the threads.
2452 */
2453 t = p->p_tlist;
2454 do {
2455 t->t_pre_sys = 1; /* so syscall will get new cred */
2456 } while ((t = t->t_forw) != p->p_tlist);
2457 }
2458
2459 return (err);
2460 }
2461
2462 /*
2463 * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2464 * terminate or perform an exec(2).
2465 *
2466 * Returns 0 if the process is fully stopped except for the current thread (if
2467 * we are operating on our own process), 1 otherwise.
2468 *
2469 * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2470 * See holdwatch() for details.
2471 */
2472 int
2473 pr_allstopped(proc_t *p, int watchstop)
2474 {
2475 kthread_t *t;
2476 int rv = 0;
2477
2478 ASSERT(MUTEX_HELD(&p->p_lock));
2479
2480 if (p->p_flag & SVFWAIT) /* waiting for vfork'd child to exec */
2481 return (-1);
2482
2483 if ((t = p->p_tlist) != NULL) {
2484 do {
2485 if (t == curthread || VSTOPPED(t) ||
2486 (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2487 continue;
2488 thread_lock(t);
2489 switch (t->t_state) {
2490 case TS_ZOMB:
2491 case TS_STOPPED:
2492 break;
2493 case TS_SLEEP:
2494 if (!(t->t_flag & T_WAKEABLE) ||
2495 t->t_wchan0 == NULL)
2496 rv = 1;
2497 break;
2498 default:
2499 rv = 1;
2500 break;
2501 }
2502 thread_unlock(t);
2503 } while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2504 }
2505
2506 return (rv);
2507 }
2508
2509 /*
2510 * Cause all lwps in the process to pause (for watchpoint operations).
2511 */
2512 static void
2513 pauselwps(proc_t *p)
2514 {
2515 kthread_t *t;
2516
2517 ASSERT(MUTEX_HELD(&p->p_lock));
2518 ASSERT(p != curproc);
2519
2520 if ((t = p->p_tlist) != NULL) {
2521 do {
2522 thread_lock(t);
2523 t->t_proc_flag |= TP_PAUSE;
2524 aston(t);
2525 if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2526 ISWAITING(t)) {
2527 setrun_locked(t);
2528 }
2529 prpokethread(t);
2530 thread_unlock(t);
2531 } while ((t = t->t_forw) != p->p_tlist);
2532 }
2533 }
2534
2535 /*
2536 * undo the effects of pauselwps()
2537 */
2538 static void
2539 unpauselwps(proc_t *p)
2540 {
2541 kthread_t *t;
2542
2543 ASSERT(MUTEX_HELD(&p->p_lock));
2544 ASSERT(p != curproc);
2545
2546 if ((t = p->p_tlist) != NULL) {
2547 do {
2548 thread_lock(t);
2549 t->t_proc_flag &= ~TP_PAUSE;
2550 if (t->t_state == TS_STOPPED) {
2551 t->t_schedflag |= TS_UNPAUSE;
2552 t->t_dtrace_stop = 0;
2553 setrun_locked(t);
2554 }
2555 thread_unlock(t);
2556 } while ((t = t->t_forw) != p->p_tlist);
2557 }
2558 }
2559
2560 /*
2561 * Cancel all watched areas. Called from prclose().
2562 */
2563 proc_t *
2564 pr_cancel_watch(prnode_t *pnp)
2565 {
2566 proc_t *p = pnp->pr_pcommon->prc_proc;
2567 struct as *as;
2568 kthread_t *t;
2569
2570 ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2571
2572 if (!pr_watch_active(p))
2573 return (p);
2574
2575 /*
2576 * Pause the process before dealing with the watchpoints.
2577 */
2578 if (p == curproc) {
2579 prunlock(pnp);
2580 while (holdwatch() != 0)
2581 continue;
2582 p = pr_p_lock(pnp);
2583 mutex_exit(&pr_pidlock);
2584 ASSERT(p == curproc);
2585 } else {
2586 pauselwps(p);
2587 while (p != NULL && pr_allstopped(p, 0) > 0) {
2588 /*
2589 * This cv/mutex pair is persistent even
2590 * if the process disappears after we
2591 * unmark it and drop p->p_lock.
2592 */
2593 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2594 kmutex_t *mp = &p->p_lock;
2595
2596 prunmark(p);
2597 (void) cv_wait(cv, mp);
2598 mutex_exit(mp);
2599 p = pr_p_lock(pnp); /* NULL if process disappeared */
2600 mutex_exit(&pr_pidlock);
2601 }
2602 }
2603
2604 if (p == NULL) /* the process disappeared */
2605 return (NULL);
2606
2607 ASSERT(p == pnp->pr_pcommon->prc_proc);
2608 ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2609
2610 if (pr_watch_active(p)) {
2611 pr_free_watchpoints(p);
2612 if ((t = p->p_tlist) != NULL) {
2613 do {
2614 watch_disable(t);
2615
2616 } while ((t = t->t_forw) != p->p_tlist);
2617 }
2618 }
2619
2620 if ((as = p->p_as) != NULL) {
2621 avl_tree_t *tree;
2622 struct watched_page *pwp;
2623
2624 /*
2625 * If this is the parent of a vfork, the watched page
2626 * list has been moved temporarily to p->p_wpage.
2627 */
2628 if (avl_numnodes(&p->p_wpage) != 0)
2629 tree = &p->p_wpage;
2630 else
2631 tree = &as->a_wpage;
2632
2633 mutex_exit(&p->p_lock);
2634 AS_LOCK_ENTER(as, RW_WRITER);
2635
2636 for (pwp = avl_first(tree); pwp != NULL;
2637 pwp = AVL_NEXT(tree, pwp)) {
2638 pwp->wp_read = 0;
2639 pwp->wp_write = 0;
2640 pwp->wp_exec = 0;
2641 if ((pwp->wp_flags & WP_SETPROT) == 0) {
2642 pwp->wp_flags |= WP_SETPROT;
2643 pwp->wp_prot = pwp->wp_oprot;
2644 pwp->wp_list = p->p_wprot;
2645 p->p_wprot = pwp;
2646 }
2647 }
2648
2649 AS_LOCK_EXIT(as);
2650 mutex_enter(&p->p_lock);
2651 }
2652
2653 /*
2654 * Unpause the process now.
2655 */
2656 if (p == curproc)
2657 continuelwps(p);
2658 else
2659 unpauselwps(p);
2660
2661 return (p);
2662 }