1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016, Joyent, Inc.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #include <sys/types.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/cmn_err.h>
34 #include <sys/cred.h>
35 #include <sys/priv.h>
36 #include <sys/debug.h>
37 #include <sys/errno.h>
38 #include <sys/inline.h>
39 #include <sys/kmem.h>
40 #include <sys/mman.h>
41 #include <sys/proc.h>
42 #include <sys/brand.h>
43 #include <sys/sobject.h>
44 #include <sys/sysmacros.h>
45 #include <sys/systm.h>
46 #include <sys/uio.h>
47 #include <sys/var.h>
48 #include <sys/vfs.h>
49 #include <sys/vnode.h>
50 #include <sys/session.h>
51 #include <sys/pcb.h>
52 #include <sys/signal.h>
53 #include <sys/user.h>
54 #include <sys/disp.h>
55 #include <sys/class.h>
56 #include <sys/ts.h>
57 #include <sys/bitmap.h>
58 #include <sys/poll.h>
59 #include <sys/shm_impl.h>
60 #include <sys/fault.h>
61 #include <sys/syscall.h>
62 #include <sys/procfs.h>
63 #include <sys/processor.h>
64 #include <sys/cpuvar.h>
65 #include <sys/copyops.h>
66 #include <sys/time.h>
67 #include <sys/msacct.h>
68 #include <vm/as.h>
69 #include <vm/rm.h>
70 #include <vm/seg.h>
71 #include <vm/seg_vn.h>
72 #include <vm/seg_dev.h>
73 #include <vm/seg_spt.h>
74 #include <vm/page.h>
75 #include <sys/vmparam.h>
76 #include <sys/swap.h>
77 #include <fs/proc/prdata.h>
78 #include <sys/task.h>
79 #include <sys/project.h>
80 #include <sys/contract_impl.h>
81 #include <sys/contract/process.h>
82 #include <sys/contract/process_impl.h>
83 #include <sys/schedctl.h>
84 #include <sys/pool.h>
85 #include <sys/zone.h>
86 #include <sys/atomic.h>
87 #include <sys/sdt.h>
88
89 #define MAX_ITERS_SPIN 5
90
91 typedef struct prpagev {
92 uint_t *pg_protv; /* vector of page permissions */
93 char *pg_incore; /* vector of incore flags */
94 size_t pg_npages; /* number of pages in protv and incore */
95 ulong_t pg_pnbase; /* pn within segment of first protv element */
96 } prpagev_t;
97
98 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
99
100 extern struct seg_ops segdev_ops; /* needs a header file */
101 extern struct seg_ops segspt_shmops; /* needs a header file */
102
103 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
104 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
105
106 /*
107 * Choose an lwp from the complete set of lwps for the process.
108 * This is called for any operation applied to the process
109 * file descriptor that requires an lwp to operate upon.
110 *
111 * Returns a pointer to the thread for the selected LWP,
112 * and with the dispatcher lock held for the thread.
113 *
114 * The algorithm for choosing an lwp is critical for /proc semantics;
115 * don't touch this code unless you know all of the implications.
116 */
117 kthread_t *
118 prchoose(proc_t *p)
119 {
120 kthread_t *t;
121 kthread_t *t_onproc = NULL; /* running on processor */
122 kthread_t *t_run = NULL; /* runnable, on disp queue */
123 kthread_t *t_sleep = NULL; /* sleeping */
124 kthread_t *t_hold = NULL; /* sleeping, performing hold */
125 kthread_t *t_susp = NULL; /* suspended stop */
126 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */
127 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */
128 kthread_t *t_req = NULL; /* requested stop */
129 kthread_t *t_istop = NULL; /* event-of-interest stop */
130 kthread_t *t_dtrace = NULL; /* DTrace stop */
131
132 ASSERT(MUTEX_HELD(&p->p_lock));
133
134 /*
135 * If the agent lwp exists, it takes precedence over all others.
136 */
137 if ((t = p->p_agenttp) != NULL) {
138 thread_lock(t);
139 return (t);
140 }
141
142 if ((t = p->p_tlist) == NULL) /* start at the head of the list */
143 return (t);
144 do { /* for eacn lwp in the process */
145 if (VSTOPPED(t)) { /* virtually stopped */
146 if (t_req == NULL)
147 t_req = t;
148 continue;
149 }
150
151 thread_lock(t); /* make sure thread is in good state */
152 switch (t->t_state) {
153 default:
154 panic("prchoose: bad thread state %d, thread 0x%p",
155 t->t_state, (void *)t);
156 /*NOTREACHED*/
157 case TS_SLEEP:
158 /* this is filthy */
159 if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
160 t->t_wchan0 == NULL) {
161 if (t_hold == NULL)
162 t_hold = t;
163 } else {
164 if (t_sleep == NULL)
165 t_sleep = t;
166 }
167 break;
168 case TS_RUN:
169 case TS_WAIT:
170 if (t_run == NULL)
171 t_run = t;
172 break;
173 case TS_ONPROC:
174 if (t_onproc == NULL)
175 t_onproc = t;
176 break;
177 case TS_ZOMB: /* last possible choice */
178 break;
179 case TS_STOPPED:
180 switch (t->t_whystop) {
181 case PR_SUSPENDED:
182 if (t_susp == NULL)
183 t_susp = t;
184 break;
185 case PR_JOBCONTROL:
186 if (t->t_proc_flag & TP_PRSTOP) {
187 if (t_jdstop == NULL)
188 t_jdstop = t;
189 } else {
190 if (t_jstop == NULL)
191 t_jstop = t;
192 }
193 break;
194 case PR_REQUESTED:
195 if (t->t_dtrace_stop && t_dtrace == NULL)
196 t_dtrace = t;
197 else if (t_req == NULL)
198 t_req = t;
199 break;
200 case PR_SYSENTRY:
201 case PR_SYSEXIT:
202 case PR_SIGNALLED:
203 case PR_FAULTED:
204 case PR_BRAND:
205 /*
206 * Make an lwp calling exit() be the
207 * last lwp seen in the process.
208 */
209 if (t_istop == NULL ||
210 (t_istop->t_whystop == PR_SYSENTRY &&
211 t_istop->t_whatstop == SYS_exit))
212 t_istop = t;
213 break;
214 case PR_CHECKPOINT: /* can't happen? */
215 break;
216 default:
217 panic("prchoose: bad t_whystop %d, thread 0x%p",
218 t->t_whystop, (void *)t);
219 /*NOTREACHED*/
220 }
221 break;
222 }
223 thread_unlock(t);
224 } while ((t = t->t_forw) != p->p_tlist);
225
226 if (t_onproc)
227 t = t_onproc;
228 else if (t_run)
229 t = t_run;
230 else if (t_sleep)
231 t = t_sleep;
232 else if (t_jstop)
233 t = t_jstop;
234 else if (t_jdstop)
235 t = t_jdstop;
236 else if (t_istop)
237 t = t_istop;
238 else if (t_dtrace)
239 t = t_dtrace;
240 else if (t_req)
241 t = t_req;
242 else if (t_hold)
243 t = t_hold;
244 else if (t_susp)
245 t = t_susp;
246 else /* TS_ZOMB */
247 t = p->p_tlist;
248
249 if (t != NULL)
250 thread_lock(t);
251 return (t);
252 }
253
254 /*
255 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
256 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
257 * on the /proc file descriptor. Called from stop() when a traced
258 * process stops on an event of interest. Also called from exit()
259 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
260 */
261 void
262 prnotify(struct vnode *vp)
263 {
264 prcommon_t *pcp = VTOP(vp)->pr_common;
265
266 mutex_enter(&pcp->prc_mutex);
267 cv_broadcast(&pcp->prc_wait);
268 mutex_exit(&pcp->prc_mutex);
269 if (pcp->prc_flags & PRC_POLL) {
270 /*
271 * We call pollwakeup() with POLLHUP to ensure that
272 * the pollers are awakened even if they are polling
273 * for nothing (i.e., waiting for the process to exit).
274 * This enables the use of the PRC_POLL flag for optimization
275 * (we can turn off PRC_POLL only if we know no pollers remain).
276 */
277 pcp->prc_flags &= ~PRC_POLL;
278 pollwakeup(&pcp->prc_pollhead, POLLHUP);
279 }
280 }
281
282 /* called immediately below, in prfree() */
283 static void
284 prfreenotify(vnode_t *vp)
285 {
286 prnode_t *pnp;
287 prcommon_t *pcp;
288
289 while (vp != NULL) {
290 pnp = VTOP(vp);
291 pcp = pnp->pr_common;
292 ASSERT(pcp->prc_thread == NULL);
293 pcp->prc_proc = NULL;
294 /*
295 * We can't call prnotify() here because we are holding
296 * pidlock. We assert that there is no need to.
297 */
298 mutex_enter(&pcp->prc_mutex);
299 cv_broadcast(&pcp->prc_wait);
300 mutex_exit(&pcp->prc_mutex);
301 ASSERT(!(pcp->prc_flags & PRC_POLL));
302
303 vp = pnp->pr_next;
304 pnp->pr_next = NULL;
305 }
306 }
307
308 /*
309 * Called from a hook in freeproc() when a traced process is removed
310 * from the process table. The proc-table pointers of all associated
311 * /proc vnodes are cleared to indicate that the process has gone away.
312 */
313 void
314 prfree(proc_t *p)
315 {
316 uint_t slot = p->p_slot;
317
318 ASSERT(MUTEX_HELD(&pidlock));
319
320 /*
321 * Block the process against /proc so it can be freed.
322 * It cannot be freed while locked by some controlling process.
323 * Lock ordering:
324 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
325 */
326 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */
327 mutex_enter(&p->p_lock);
328 while (p->p_proc_flag & P_PR_LOCK) {
329 mutex_exit(&pr_pidlock);
330 cv_wait(&pr_pid_cv[slot], &p->p_lock);
331 mutex_exit(&p->p_lock);
332 mutex_enter(&pr_pidlock);
333 mutex_enter(&p->p_lock);
334 }
335
336 ASSERT(p->p_tlist == NULL);
337
338 prfreenotify(p->p_plist);
339 p->p_plist = NULL;
340
341 prfreenotify(p->p_trace);
342 p->p_trace = NULL;
343
344 /*
345 * We broadcast to wake up everyone waiting for this process.
346 * No one can reach this process from this point on.
347 */
348 cv_broadcast(&pr_pid_cv[slot]);
349
350 mutex_exit(&p->p_lock);
351 mutex_exit(&pr_pidlock);
352 }
353
354 /*
355 * Called from a hook in exit() when a traced process is becoming a zombie.
356 */
357 void
358 prexit(proc_t *p)
359 {
360 ASSERT(MUTEX_HELD(&p->p_lock));
361
362 if (pr_watch_active(p)) {
363 pr_free_watchpoints(p);
364 watch_disable(curthread);
365 }
366 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */
367 if (p->p_trace) {
368 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
369 prnotify(p->p_trace);
370 }
371 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */
372 }
373
374 /*
375 * Called when a thread calls lwp_exit().
376 */
377 void
378 prlwpexit(kthread_t *t)
379 {
380 vnode_t *vp;
381 prnode_t *pnp;
382 prcommon_t *pcp;
383 proc_t *p = ttoproc(t);
384 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
385
386 ASSERT(t == curthread);
387 ASSERT(MUTEX_HELD(&p->p_lock));
388
389 /*
390 * The process must be blocked against /proc to do this safely.
391 * The lwp must not disappear while the process is marked P_PR_LOCK.
392 * It is the caller's responsibility to have called prbarrier(p).
393 */
394 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
395
396 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
397 pnp = VTOP(vp);
398 pcp = pnp->pr_common;
399 if (pcp->prc_thread == t) {
400 pcp->prc_thread = NULL;
401 pcp->prc_flags |= PRC_DESTROY;
402 }
403 }
404
405 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
406 pnp = VTOP(vp);
407 pcp = pnp->pr_common;
408 pcp->prc_thread = NULL;
409 pcp->prc_flags |= PRC_DESTROY;
410 prnotify(vp);
411 }
412
413 if (p->p_trace)
414 prnotify(p->p_trace);
415 }
416
417 /*
418 * Called when a zombie thread is joined or when a
419 * detached lwp exits. Called from lwp_hash_out().
420 */
421 void
422 prlwpfree(proc_t *p, lwpent_t *lep)
423 {
424 vnode_t *vp;
425 prnode_t *pnp;
426 prcommon_t *pcp;
427
428 ASSERT(MUTEX_HELD(&p->p_lock));
429
430 /*
431 * The process must be blocked against /proc to do this safely.
432 * The lwp must not disappear while the process is marked P_PR_LOCK.
433 * It is the caller's responsibility to have called prbarrier(p).
434 */
435 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
436
437 vp = lep->le_trace;
438 lep->le_trace = NULL;
439 while (vp) {
440 prnotify(vp);
441 pnp = VTOP(vp);
442 pcp = pnp->pr_common;
443 ASSERT(pcp->prc_thread == NULL &&
444 (pcp->prc_flags & PRC_DESTROY));
445 pcp->prc_tslot = -1;
446 vp = pnp->pr_next;
447 pnp->pr_next = NULL;
448 }
449
450 if (p->p_trace)
451 prnotify(p->p_trace);
452 }
453
454 /*
455 * Called from a hook in exec() when a thread starts exec().
456 */
457 void
458 prexecstart(void)
459 {
460 proc_t *p = ttoproc(curthread);
461 klwp_t *lwp = ttolwp(curthread);
462
463 /*
464 * The P_PR_EXEC flag blocks /proc operations for
465 * the duration of the exec().
466 * We can't start exec() while the process is
467 * locked by /proc, so we call prbarrier().
468 * lwp_nostop keeps the process from being stopped
469 * via job control for the duration of the exec().
470 */
471
472 ASSERT(MUTEX_HELD(&p->p_lock));
473 prbarrier(p);
474 lwp->lwp_nostop++;
475 p->p_proc_flag |= P_PR_EXEC;
476 }
477
478 /*
479 * Called from a hook in exec() when a thread finishes exec().
480 * The thread may or may not have succeeded. Some other thread
481 * may have beat it to the punch.
482 */
483 void
484 prexecend(void)
485 {
486 proc_t *p = ttoproc(curthread);
487 klwp_t *lwp = ttolwp(curthread);
488 vnode_t *vp;
489 prnode_t *pnp;
490 prcommon_t *pcp;
491 model_t model = p->p_model;
492 id_t tid = curthread->t_tid;
493 int tslot = curthread->t_dslot;
494
495 ASSERT(MUTEX_HELD(&p->p_lock));
496
497 lwp->lwp_nostop--;
498 if (p->p_flag & SEXITLWPS) {
499 /*
500 * We are on our way to exiting because some
501 * other thread beat us in the race to exec().
502 * Don't clear the P_PR_EXEC flag in this case.
503 */
504 return;
505 }
506
507 /*
508 * Wake up anyone waiting in /proc for the process to complete exec().
509 */
510 p->p_proc_flag &= ~P_PR_EXEC;
511 if ((vp = p->p_trace) != NULL) {
512 pcp = VTOP(vp)->pr_common;
513 mutex_enter(&pcp->prc_mutex);
514 cv_broadcast(&pcp->prc_wait);
515 mutex_exit(&pcp->prc_mutex);
516 for (; vp != NULL; vp = pnp->pr_next) {
517 pnp = VTOP(vp);
518 pnp->pr_common->prc_datamodel = model;
519 }
520 }
521 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
522 /*
523 * We dealt with the process common above.
524 */
525 ASSERT(p->p_trace != NULL);
526 pcp = VTOP(vp)->pr_common;
527 mutex_enter(&pcp->prc_mutex);
528 cv_broadcast(&pcp->prc_wait);
529 mutex_exit(&pcp->prc_mutex);
530 for (; vp != NULL; vp = pnp->pr_next) {
531 pnp = VTOP(vp);
532 pcp = pnp->pr_common;
533 pcp->prc_datamodel = model;
534 pcp->prc_tid = tid;
535 pcp->prc_tslot = tslot;
536 }
537 }
538
539 /*
540 * There may be threads waiting for the flag change blocked behind the
541 * pr_pid_cv as well.
542 */
543 cv_signal(&pr_pid_cv[p->p_slot]);
544 }
545
546 /*
547 * Called from a hook in relvm() just before freeing the address space.
548 * We free all the watched areas now.
549 */
550 void
551 prrelvm(void)
552 {
553 proc_t *p = ttoproc(curthread);
554
555 mutex_enter(&p->p_lock);
556 prbarrier(p); /* block all other /proc operations */
557 if (pr_watch_active(p)) {
558 pr_free_watchpoints(p);
559 watch_disable(curthread);
560 }
561 mutex_exit(&p->p_lock);
562 pr_free_watched_pages(p);
563 }
564
565 /*
566 * Called from hooks in exec-related code when a traced process
567 * attempts to exec(2) a setuid/setgid program or an unreadable
568 * file. Rather than fail the exec we invalidate the associated
569 * /proc vnodes so that subsequent attempts to use them will fail.
570 *
571 * All /proc vnodes, except directory vnodes, are retained on a linked
572 * list (rooted at p_plist in the process structure) until last close.
573 *
574 * A controlling process must re-open the /proc files in order to
575 * regain control.
576 */
577 void
578 prinvalidate(struct user *up)
579 {
580 kthread_t *t = curthread;
581 proc_t *p = ttoproc(t);
582 vnode_t *vp;
583 prnode_t *pnp;
584 int writers = 0;
585
586 mutex_enter(&p->p_lock);
587 prbarrier(p); /* block all other /proc operations */
588
589 /*
590 * At this moment, there can be only one lwp in the process.
591 */
592 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
593
594 /*
595 * Invalidate any currently active /proc vnodes.
596 */
597 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
598 pnp = VTOP(vp);
599 switch (pnp->pr_type) {
600 case PR_PSINFO: /* these files can read by anyone */
601 case PR_LPSINFO:
602 case PR_LWPSINFO:
603 case PR_LWPDIR:
604 case PR_LWPIDDIR:
605 case PR_USAGE:
606 case PR_LUSAGE:
607 case PR_LWPUSAGE:
608 break;
609 default:
610 pnp->pr_flags |= PR_INVAL;
611 break;
612 }
613 }
614 /*
615 * Wake up anyone waiting for the process or lwp.
616 * p->p_trace is guaranteed to be non-NULL if there
617 * are any open /proc files for this process.
618 */
619 if ((vp = p->p_trace) != NULL) {
620 prcommon_t *pcp = VTOP(vp)->pr_pcommon;
621
622 prnotify(vp);
623 /*
624 * Are there any writers?
625 */
626 if ((writers = pcp->prc_writers) != 0) {
627 /*
628 * Clear the exclusive open flag (old /proc interface).
629 * Set prc_selfopens equal to prc_writers so that
630 * the next O_EXCL|O_WRITE open will succeed
631 * even with existing (though invalid) writers.
632 * prclose() must decrement prc_selfopens when
633 * the invalid files are closed.
634 */
635 pcp->prc_flags &= ~PRC_EXCL;
636 ASSERT(pcp->prc_selfopens <= writers);
637 pcp->prc_selfopens = writers;
638 }
639 }
640 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
641 while (vp != NULL) {
642 /*
643 * We should not invalidate the lwpiddir vnodes,
644 * but the necessities of maintaining the old
645 * ioctl()-based version of /proc require it.
646 */
647 pnp = VTOP(vp);
648 pnp->pr_flags |= PR_INVAL;
649 prnotify(vp);
650 vp = pnp->pr_next;
651 }
652
653 /*
654 * If any tracing flags are in effect and any vnodes are open for
655 * writing then set the requested-stop and run-on-last-close flags.
656 * Otherwise, clear all tracing flags.
657 */
658 t->t_proc_flag &= ~TP_PAUSE;
659 if ((p->p_proc_flag & P_PR_TRACE) && writers) {
660 t->t_proc_flag |= TP_PRSTOP;
661 aston(t); /* so ISSIG will see the flag */
662 p->p_proc_flag |= P_PR_RUNLCL;
663 } else {
664 premptyset(&up->u_entrymask); /* syscalls */
665 premptyset(&up->u_exitmask);
666 up->u_systrap = 0;
667 premptyset(&p->p_sigmask); /* signals */
668 premptyset(&p->p_fltmask); /* faults */
669 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
670 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
671 prnostep(ttolwp(t));
672 }
673
674 mutex_exit(&p->p_lock);
675 }
676
677 /*
678 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
679 * Return with pr_pidlock held in all cases.
680 * Return with p_lock held if the the process still exists.
681 * Return value is the process pointer if the process still exists, else NULL.
682 * If we lock the process, give ourself kernel priority to avoid deadlocks;
683 * this is undone in prunlock().
684 */
685 proc_t *
686 pr_p_lock(prnode_t *pnp)
687 {
688 proc_t *p;
689 prcommon_t *pcp;
690
691 mutex_enter(&pr_pidlock);
692 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
693 return (NULL);
694 mutex_enter(&p->p_lock);
695 while (p->p_proc_flag & P_PR_LOCK) {
696 /*
697 * This cv/mutex pair is persistent even if
698 * the process disappears while we sleep.
699 */
700 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
701 kmutex_t *mp = &p->p_lock;
702
703 mutex_exit(&pr_pidlock);
704 cv_wait(cv, mp);
705 mutex_exit(mp);
706 mutex_enter(&pr_pidlock);
707 if (pcp->prc_proc == NULL)
708 return (NULL);
709 ASSERT(p == pcp->prc_proc);
710 mutex_enter(&p->p_lock);
711 }
712 p->p_proc_flag |= P_PR_LOCK;
713 THREAD_KPRI_REQUEST();
714 return (p);
715 }
716
717 /*
718 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
719 * This prevents any lwp of the process from disappearing and
720 * blocks most operations that a process can perform on itself.
721 * Returns 0 on success, a non-zero error number on failure.
722 *
723 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
724 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
725 *
726 * error returns:
727 * ENOENT: process or lwp has disappeared or process is exiting
728 * (or has become a zombie and zdisp == ZNO).
729 * EAGAIN: procfs vnode has become invalid.
730 * EINTR: signal arrived while waiting for exec to complete.
731 */
732 int
733 prlock(prnode_t *pnp, int zdisp)
734 {
735 prcommon_t *pcp;
736 proc_t *p;
737
738 again:
739 pcp = pnp->pr_common;
740 p = pr_p_lock(pnp);
741 mutex_exit(&pr_pidlock);
742
743 /*
744 * Return ENOENT immediately if there is no process.
745 */
746 if (p == NULL)
747 return (ENOENT);
748
749 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
750
751 /*
752 * Return ENOENT if process entered zombie state or is exiting
753 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
754 */
755 if (zdisp == ZNO &&
756 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
757 prunlock(pnp);
758 return (ENOENT);
759 }
760
761 /*
762 * If lwp-specific, check to see if lwp has disappeared.
763 */
764 if (pcp->prc_flags & PRC_LWP) {
765 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
766 pcp->prc_tslot == -1) {
767 prunlock(pnp);
768 return (ENOENT);
769 }
770 }
771
772 /*
773 * Return EAGAIN if we have encountered a security violation.
774 * (The process exec'd a set-id or unreadable executable file.)
775 */
776 if (pnp->pr_flags & PR_INVAL) {
777 prunlock(pnp);
778 return (EAGAIN);
779 }
780
781 /*
782 * If process is undergoing an exec(), wait for
783 * completion and then start all over again.
784 */
785 if (p->p_proc_flag & P_PR_EXEC) {
786 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */
787 mutex_enter(&pcp->prc_mutex);
788 prunlock(pnp);
789 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
790 mutex_exit(&pcp->prc_mutex);
791 return (EINTR);
792 }
793 mutex_exit(&pcp->prc_mutex);
794 goto again;
795 }
796
797 /*
798 * We return holding p->p_lock.
799 */
800 return (0);
801 }
802
803 /*
804 * Undo prlock() and pr_p_lock().
805 * p->p_lock is still held; pr_pidlock is no longer held.
806 *
807 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
808 * if any, waiting for the flag to be dropped; it retains p->p_lock.
809 *
810 * prunlock() calls prunmark() and then drops p->p_lock.
811 */
812 void
813 prunmark(proc_t *p)
814 {
815 ASSERT(p->p_proc_flag & P_PR_LOCK);
816 ASSERT(MUTEX_HELD(&p->p_lock));
817
818 cv_signal(&pr_pid_cv[p->p_slot]);
819 p->p_proc_flag &= ~P_PR_LOCK;
820 THREAD_KPRI_RELEASE();
821 }
822
823 void
824 prunlock(prnode_t *pnp)
825 {
826 prcommon_t *pcp = pnp->pr_common;
827 proc_t *p = pcp->prc_proc;
828
829 /*
830 * If we (or someone) gave it a SIGKILL, and it is not
831 * already a zombie, set it running unconditionally.
832 */
833 if ((p->p_flag & SKILLED) &&
834 !(p->p_flag & SEXITING) &&
835 !(pcp->prc_flags & PRC_DESTROY) &&
836 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
837 (void) pr_setrun(pnp, 0);
838 prunmark(p);
839 mutex_exit(&p->p_lock);
840 }
841
842 /*
843 * Called while holding p->p_lock to delay until the process is unlocked.
844 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
845 * The process cannot become locked again until p->p_lock is dropped.
846 */
847 void
848 prbarrier(proc_t *p)
849 {
850 ASSERT(MUTEX_HELD(&p->p_lock));
851
852 if (p->p_proc_flag & P_PR_LOCK) {
853 /* The process is locked; delay until not locked */
854 uint_t slot = p->p_slot;
855
856 while (p->p_proc_flag & P_PR_LOCK)
857 cv_wait(&pr_pid_cv[slot], &p->p_lock);
858 cv_signal(&pr_pid_cv[slot]);
859 }
860 }
861
862 /*
863 * Return process/lwp status.
864 * The u-block is mapped in by this routine and unmapped at the end.
865 */
866 void
867 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
868 {
869 kthread_t *t;
870
871 ASSERT(MUTEX_HELD(&p->p_lock));
872
873 t = prchoose(p); /* returns locked thread */
874 ASSERT(t != NULL);
875 thread_unlock(t);
876
877 /* just bzero the process part, prgetlwpstatus() does the rest */
878 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
879 sp->pr_nlwp = p->p_lwpcnt;
880 sp->pr_nzomb = p->p_zombcnt;
881 prassignset(&sp->pr_sigpend, &p->p_sig);
882 sp->pr_brkbase = (uintptr_t)p->p_brkbase;
883 sp->pr_brksize = p->p_brksize;
884 sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
885 sp->pr_stksize = p->p_stksize;
886 sp->pr_pid = p->p_pid;
887 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
888 (p->p_flag & SZONETOP)) {
889 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
890 /*
891 * Inside local zones, fake zsched's pid as parent pids for
892 * processes which reference processes outside of the zone.
893 */
894 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
895 } else {
896 sp->pr_ppid = p->p_ppid;
897 }
898 sp->pr_pgid = p->p_pgrp;
899 sp->pr_sid = p->p_sessp->s_sid;
900 sp->pr_taskid = p->p_task->tk_tkid;
901 sp->pr_projid = p->p_task->tk_proj->kpj_id;
902 sp->pr_zoneid = p->p_zone->zone_id;
903 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
904 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
905 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
906 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
907 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
908 prassignset(&sp->pr_flttrace, &p->p_fltmask);
909 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
910 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
911 switch (p->p_model) {
912 case DATAMODEL_ILP32:
913 sp->pr_dmodel = PR_MODEL_ILP32;
914 break;
915 case DATAMODEL_LP64:
916 sp->pr_dmodel = PR_MODEL_LP64;
917 break;
918 }
919 if (p->p_agenttp)
920 sp->pr_agentid = p->p_agenttp->t_tid;
921
922 /* get the chosen lwp's status */
923 prgetlwpstatus(t, &sp->pr_lwp, zp);
924
925 /* replicate the flags */
926 sp->pr_flags = sp->pr_lwp.pr_flags;
927 }
928
929 #ifdef _SYSCALL32_IMPL
930 void
931 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
932 {
933 proc_t *p = ttoproc(t);
934 klwp_t *lwp = ttolwp(t);
935 struct mstate *ms = &lwp->lwp_mstate;
936 hrtime_t usr, sys;
937 int flags;
938 ulong_t instr;
939
940 ASSERT(MUTEX_HELD(&p->p_lock));
941
942 bzero(sp, sizeof (*sp));
943 flags = 0L;
944 if (t->t_state == TS_STOPPED) {
945 flags |= PR_STOPPED;
946 if ((t->t_schedflag & TS_PSTART) == 0)
947 flags |= PR_ISTOP;
948 } else if (VSTOPPED(t)) {
949 flags |= PR_STOPPED|PR_ISTOP;
950 }
951 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
952 flags |= PR_DSTOP;
953 if (lwp->lwp_asleep)
954 flags |= PR_ASLEEP;
955 if (t == p->p_agenttp)
956 flags |= PR_AGENT;
957 if (!(t->t_proc_flag & TP_TWAIT))
958 flags |= PR_DETACH;
959 if (t->t_proc_flag & TP_DAEMON)
960 flags |= PR_DAEMON;
961 if (p->p_proc_flag & P_PR_FORK)
962 flags |= PR_FORK;
963 if (p->p_proc_flag & P_PR_RUNLCL)
964 flags |= PR_RLC;
965 if (p->p_proc_flag & P_PR_KILLCL)
966 flags |= PR_KLC;
967 if (p->p_proc_flag & P_PR_ASYNC)
968 flags |= PR_ASYNC;
969 if (p->p_proc_flag & P_PR_BPTADJ)
970 flags |= PR_BPTADJ;
971 if (p->p_proc_flag & P_PR_PTRACE)
972 flags |= PR_PTRACE;
973 if (p->p_flag & SMSACCT)
974 flags |= PR_MSACCT;
975 if (p->p_flag & SMSFORK)
976 flags |= PR_MSFORK;
977 if (p->p_flag & SVFWAIT)
978 flags |= PR_VFORKP;
979 sp->pr_flags = flags;
980 if (VSTOPPED(t)) {
981 sp->pr_why = PR_REQUESTED;
982 sp->pr_what = 0;
983 } else {
984 sp->pr_why = t->t_whystop;
985 sp->pr_what = t->t_whatstop;
986 }
987 sp->pr_lwpid = t->t_tid;
988 sp->pr_cursig = lwp->lwp_cursig;
989 prassignset(&sp->pr_lwppend, &t->t_sig);
990 schedctl_finish_sigblock(t);
991 prassignset(&sp->pr_lwphold, &t->t_hold);
992 if (t->t_whystop == PR_FAULTED) {
993 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
994 if (t->t_whatstop == FLTPAGE)
995 sp->pr_info.si_addr =
996 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
997 } else if (lwp->lwp_curinfo)
998 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
999 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1000 sp->pr_info.si_zoneid != zp->zone_id) {
1001 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1002 sp->pr_info.si_uid = 0;
1003 sp->pr_info.si_ctid = -1;
1004 sp->pr_info.si_zoneid = zp->zone_id;
1005 }
1006 sp->pr_altstack.ss_sp =
1007 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1008 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1009 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1010 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1011 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1012 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1013 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1014 sizeof (sp->pr_clname) - 1);
1015 if (flags & PR_STOPPED)
1016 hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1017 usr = ms->ms_acct[LMS_USER];
1018 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1019 scalehrtime(&usr);
1020 scalehrtime(&sys);
1021 hrt2ts32(usr, &sp->pr_utime);
1022 hrt2ts32(sys, &sp->pr_stime);
1023
1024 /*
1025 * Fetch the current instruction, if not a system process.
1026 * We don't attempt this unless the lwp is stopped.
1027 */
1028 if ((p->p_flag & SSYS) || p->p_as == &kas)
1029 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1030 else if (!(flags & PR_STOPPED))
1031 sp->pr_flags |= PR_PCINVAL;
1032 else if (!prfetchinstr(lwp, &instr))
1033 sp->pr_flags |= PR_PCINVAL;
1034 else
1035 sp->pr_instr = (uint32_t)instr;
1036
1037 /*
1038 * Drop p_lock while touching the lwp's stack.
1039 */
1040 mutex_exit(&p->p_lock);
1041 if (prisstep(lwp))
1042 sp->pr_flags |= PR_STEP;
1043 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1044 int i;
1045
1046 sp->pr_syscall = get_syscall32_args(lwp,
1047 (int *)sp->pr_sysarg, &i);
1048 sp->pr_nsysarg = (ushort_t)i;
1049 }
1050 if ((flags & PR_STOPPED) || t == curthread)
1051 prgetprregs32(lwp, sp->pr_reg);
1052 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1053 (flags & PR_VFORKP)) {
1054 long r1, r2;
1055 user_t *up;
1056 auxv_t *auxp;
1057 int i;
1058
1059 sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1060 if (sp->pr_errno == 0) {
1061 sp->pr_rval1 = (int32_t)r1;
1062 sp->pr_rval2 = (int32_t)r2;
1063 sp->pr_errpriv = PRIV_NONE;
1064 } else
1065 sp->pr_errpriv = lwp->lwp_badpriv;
1066
1067 if (t->t_sysnum == SYS_execve) {
1068 up = PTOU(p);
1069 sp->pr_sysarg[0] = 0;
1070 sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1071 sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1072 for (i = 0, auxp = up->u_auxv;
1073 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1074 i++, auxp++) {
1075 if (auxp->a_type == AT_SUN_EXECNAME) {
1076 sp->pr_sysarg[0] =
1077 (caddr32_t)
1078 (uintptr_t)auxp->a_un.a_ptr;
1079 break;
1080 }
1081 }
1082 }
1083 }
1084 if (prhasfp())
1085 prgetprfpregs32(lwp, &sp->pr_fpreg);
1086 mutex_enter(&p->p_lock);
1087 }
1088
1089 void
1090 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1091 {
1092 kthread_t *t;
1093
1094 ASSERT(MUTEX_HELD(&p->p_lock));
1095
1096 t = prchoose(p); /* returns locked thread */
1097 ASSERT(t != NULL);
1098 thread_unlock(t);
1099
1100 /* just bzero the process part, prgetlwpstatus32() does the rest */
1101 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1102 sp->pr_nlwp = p->p_lwpcnt;
1103 sp->pr_nzomb = p->p_zombcnt;
1104 prassignset(&sp->pr_sigpend, &p->p_sig);
1105 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1106 sp->pr_brksize = (uint32_t)p->p_brksize;
1107 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1108 sp->pr_stksize = (uint32_t)p->p_stksize;
1109 sp->pr_pid = p->p_pid;
1110 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1111 (p->p_flag & SZONETOP)) {
1112 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1113 /*
1114 * Inside local zones, fake zsched's pid as parent pids for
1115 * processes which reference processes outside of the zone.
1116 */
1117 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1118 } else {
1119 sp->pr_ppid = p->p_ppid;
1120 }
1121 sp->pr_pgid = p->p_pgrp;
1122 sp->pr_sid = p->p_sessp->s_sid;
1123 sp->pr_taskid = p->p_task->tk_tkid;
1124 sp->pr_projid = p->p_task->tk_proj->kpj_id;
1125 sp->pr_zoneid = p->p_zone->zone_id;
1126 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1127 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1128 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1129 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1130 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1131 prassignset(&sp->pr_flttrace, &p->p_fltmask);
1132 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1133 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1134 switch (p->p_model) {
1135 case DATAMODEL_ILP32:
1136 sp->pr_dmodel = PR_MODEL_ILP32;
1137 break;
1138 case DATAMODEL_LP64:
1139 sp->pr_dmodel = PR_MODEL_LP64;
1140 break;
1141 }
1142 if (p->p_agenttp)
1143 sp->pr_agentid = p->p_agenttp->t_tid;
1144
1145 /* get the chosen lwp's status */
1146 prgetlwpstatus32(t, &sp->pr_lwp, zp);
1147
1148 /* replicate the flags */
1149 sp->pr_flags = sp->pr_lwp.pr_flags;
1150 }
1151 #endif /* _SYSCALL32_IMPL */
1152
1153 /*
1154 * Return lwp status.
1155 */
1156 void
1157 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1158 {
1159 proc_t *p = ttoproc(t);
1160 klwp_t *lwp = ttolwp(t);
1161 struct mstate *ms = &lwp->lwp_mstate;
1162 hrtime_t usr, sys;
1163 int flags;
1164 ulong_t instr;
1165
1166 ASSERT(MUTEX_HELD(&p->p_lock));
1167
1168 bzero(sp, sizeof (*sp));
1169 flags = 0L;
1170 if (t->t_state == TS_STOPPED) {
1171 flags |= PR_STOPPED;
1172 if ((t->t_schedflag & TS_PSTART) == 0)
1173 flags |= PR_ISTOP;
1174 } else if (VSTOPPED(t)) {
1175 flags |= PR_STOPPED|PR_ISTOP;
1176 }
1177 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1178 flags |= PR_DSTOP;
1179 if (lwp->lwp_asleep)
1180 flags |= PR_ASLEEP;
1181 if (t == p->p_agenttp)
1182 flags |= PR_AGENT;
1183 if (!(t->t_proc_flag & TP_TWAIT))
1184 flags |= PR_DETACH;
1185 if (t->t_proc_flag & TP_DAEMON)
1186 flags |= PR_DAEMON;
1187 if (p->p_proc_flag & P_PR_FORK)
1188 flags |= PR_FORK;
1189 if (p->p_proc_flag & P_PR_RUNLCL)
1190 flags |= PR_RLC;
1191 if (p->p_proc_flag & P_PR_KILLCL)
1192 flags |= PR_KLC;
1193 if (p->p_proc_flag & P_PR_ASYNC)
1194 flags |= PR_ASYNC;
1195 if (p->p_proc_flag & P_PR_BPTADJ)
1196 flags |= PR_BPTADJ;
1197 if (p->p_proc_flag & P_PR_PTRACE)
1198 flags |= PR_PTRACE;
1199 if (p->p_flag & SMSACCT)
1200 flags |= PR_MSACCT;
1201 if (p->p_flag & SMSFORK)
1202 flags |= PR_MSFORK;
1203 if (p->p_flag & SVFWAIT)
1204 flags |= PR_VFORKP;
1205 if (p->p_pgidp->pid_pgorphaned)
1206 flags |= PR_ORPHAN;
1207 if (p->p_pidflag & CLDNOSIGCHLD)
1208 flags |= PR_NOSIGCHLD;
1209 if (p->p_pidflag & CLDWAITPID)
1210 flags |= PR_WAITPID;
1211 sp->pr_flags = flags;
1212 if (VSTOPPED(t)) {
1213 sp->pr_why = PR_REQUESTED;
1214 sp->pr_what = 0;
1215 } else {
1216 sp->pr_why = t->t_whystop;
1217 sp->pr_what = t->t_whatstop;
1218 }
1219 sp->pr_lwpid = t->t_tid;
1220 sp->pr_cursig = lwp->lwp_cursig;
1221 prassignset(&sp->pr_lwppend, &t->t_sig);
1222 schedctl_finish_sigblock(t);
1223 prassignset(&sp->pr_lwphold, &t->t_hold);
1224 if (t->t_whystop == PR_FAULTED)
1225 bcopy(&lwp->lwp_siginfo,
1226 &sp->pr_info, sizeof (k_siginfo_t));
1227 else if (lwp->lwp_curinfo)
1228 bcopy(&lwp->lwp_curinfo->sq_info,
1229 &sp->pr_info, sizeof (k_siginfo_t));
1230 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1231 sp->pr_info.si_zoneid != zp->zone_id) {
1232 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1233 sp->pr_info.si_uid = 0;
1234 sp->pr_info.si_ctid = -1;
1235 sp->pr_info.si_zoneid = zp->zone_id;
1236 }
1237 sp->pr_altstack = lwp->lwp_sigaltstack;
1238 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1239 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1240 sp->pr_ustack = lwp->lwp_ustack;
1241 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1242 sizeof (sp->pr_clname) - 1);
1243 if (flags & PR_STOPPED)
1244 hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1245 usr = ms->ms_acct[LMS_USER];
1246 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1247 scalehrtime(&usr);
1248 scalehrtime(&sys);
1249 hrt2ts(usr, &sp->pr_utime);
1250 hrt2ts(sys, &sp->pr_stime);
1251
1252 /*
1253 * Fetch the current instruction, if not a system process.
1254 * We don't attempt this unless the lwp is stopped.
1255 */
1256 if ((p->p_flag & SSYS) || p->p_as == &kas)
1257 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1258 else if (!(flags & PR_STOPPED))
1259 sp->pr_flags |= PR_PCINVAL;
1260 else if (!prfetchinstr(lwp, &instr))
1261 sp->pr_flags |= PR_PCINVAL;
1262 else
1263 sp->pr_instr = instr;
1264
1265 /*
1266 * Drop p_lock while touching the lwp's stack.
1267 */
1268 mutex_exit(&p->p_lock);
1269 if (prisstep(lwp))
1270 sp->pr_flags |= PR_STEP;
1271 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1272 int i;
1273
1274 sp->pr_syscall = get_syscall_args(lwp,
1275 (long *)sp->pr_sysarg, &i);
1276 sp->pr_nsysarg = (ushort_t)i;
1277 }
1278 if ((flags & PR_STOPPED) || t == curthread)
1279 prgetprregs(lwp, sp->pr_reg);
1280 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1281 (flags & PR_VFORKP)) {
1282 user_t *up;
1283 auxv_t *auxp;
1284 int i;
1285
1286 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1287 if (sp->pr_errno == 0)
1288 sp->pr_errpriv = PRIV_NONE;
1289 else
1290 sp->pr_errpriv = lwp->lwp_badpriv;
1291
1292 if (t->t_sysnum == SYS_execve) {
1293 up = PTOU(p);
1294 sp->pr_sysarg[0] = 0;
1295 sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1296 sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1297 for (i = 0, auxp = up->u_auxv;
1298 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1299 i++, auxp++) {
1300 if (auxp->a_type == AT_SUN_EXECNAME) {
1301 sp->pr_sysarg[0] =
1302 (uintptr_t)auxp->a_un.a_ptr;
1303 break;
1304 }
1305 }
1306 }
1307 }
1308 if (prhasfp())
1309 prgetprfpregs(lwp, &sp->pr_fpreg);
1310 mutex_enter(&p->p_lock);
1311 }
1312
1313 /*
1314 * Get the sigaction structure for the specified signal. The u-block
1315 * must already have been mapped in by the caller.
1316 */
1317 void
1318 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1319 {
1320 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1321
1322 bzero(sp, sizeof (*sp));
1323
1324 if (sig != 0 && (unsigned)sig < nsig) {
1325 sp->sa_handler = up->u_signal[sig-1];
1326 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1327 if (sigismember(&up->u_sigonstack, sig))
1328 sp->sa_flags |= SA_ONSTACK;
1329 if (sigismember(&up->u_sigresethand, sig))
1330 sp->sa_flags |= SA_RESETHAND;
1331 if (sigismember(&up->u_sigrestart, sig))
1332 sp->sa_flags |= SA_RESTART;
1333 if (sigismember(&p->p_siginfo, sig))
1334 sp->sa_flags |= SA_SIGINFO;
1335 if (sigismember(&up->u_signodefer, sig))
1336 sp->sa_flags |= SA_NODEFER;
1337 if (sig == SIGCLD) {
1338 if (p->p_flag & SNOWAIT)
1339 sp->sa_flags |= SA_NOCLDWAIT;
1340 if ((p->p_flag & SJCTL) == 0)
1341 sp->sa_flags |= SA_NOCLDSTOP;
1342 }
1343 }
1344 }
1345
1346 #ifdef _SYSCALL32_IMPL
1347 void
1348 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1349 {
1350 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1351
1352 bzero(sp, sizeof (*sp));
1353
1354 if (sig != 0 && (unsigned)sig < nsig) {
1355 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1356 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1357 if (sigismember(&up->u_sigonstack, sig))
1358 sp->sa_flags |= SA_ONSTACK;
1359 if (sigismember(&up->u_sigresethand, sig))
1360 sp->sa_flags |= SA_RESETHAND;
1361 if (sigismember(&up->u_sigrestart, sig))
1362 sp->sa_flags |= SA_RESTART;
1363 if (sigismember(&p->p_siginfo, sig))
1364 sp->sa_flags |= SA_SIGINFO;
1365 if (sigismember(&up->u_signodefer, sig))
1366 sp->sa_flags |= SA_NODEFER;
1367 if (sig == SIGCLD) {
1368 if (p->p_flag & SNOWAIT)
1369 sp->sa_flags |= SA_NOCLDWAIT;
1370 if ((p->p_flag & SJCTL) == 0)
1371 sp->sa_flags |= SA_NOCLDSTOP;
1372 }
1373 }
1374 }
1375 #endif /* _SYSCALL32_IMPL */
1376
1377 /*
1378 * Count the number of segments in this process's address space.
1379 */
1380 int
1381 prnsegs(struct as *as, int reserved)
1382 {
1383 int n = 0;
1384 struct seg *seg;
1385
1386 ASSERT(as != &kas && AS_WRITE_HELD(as));
1387
1388 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1389 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1390 caddr_t saddr, naddr;
1391 void *tmp = NULL;
1392
1393 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1394 (void) pr_getprot(seg, reserved, &tmp,
1395 &saddr, &naddr, eaddr);
1396 if (saddr != naddr)
1397 n++;
1398 }
1399
1400 ASSERT(tmp == NULL);
1401 }
1402
1403 return (n);
1404 }
1405
1406 /*
1407 * Convert uint32_t to decimal string w/o leading zeros.
1408 * Add trailing null characters if 'len' is greater than string length.
1409 * Return the string length.
1410 */
1411 int
1412 pr_u32tos(uint32_t n, char *s, int len)
1413 {
1414 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */
1415 char *cp = cbuf;
1416 char *end = s + len;
1417
1418 do {
1419 *cp++ = (char)(n % 10 + '0');
1420 n /= 10;
1421 } while (n);
1422
1423 len = (int)(cp - cbuf);
1424
1425 do {
1426 *s++ = *--cp;
1427 } while (cp > cbuf);
1428
1429 while (s < end) /* optional pad */
1430 *s++ = '\0';
1431
1432 return (len);
1433 }
1434
1435 /*
1436 * Convert uint64_t to decimal string w/o leading zeros.
1437 * Return the string length.
1438 */
1439 static int
1440 pr_u64tos(uint64_t n, char *s)
1441 {
1442 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */
1443 char *cp = cbuf;
1444 int len;
1445
1446 do {
1447 *cp++ = (char)(n % 10 + '0');
1448 n /= 10;
1449 } while (n);
1450
1451 len = (int)(cp - cbuf);
1452
1453 do {
1454 *s++ = *--cp;
1455 } while (cp > cbuf);
1456
1457 return (len);
1458 }
1459
1460 void
1461 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1462 {
1463 char *s = name;
1464 struct vfs *vfsp;
1465 struct vfssw *vfsswp;
1466
1467 if ((vfsp = vp->v_vfsp) != NULL &&
1468 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1469 *vfsswp->vsw_name) {
1470 (void) strcpy(s, vfsswp->vsw_name);
1471 s += strlen(s);
1472 *s++ = '.';
1473 }
1474 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1475 *s++ = '.';
1476 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1477 *s++ = '.';
1478 s += pr_u64tos(vattr->va_nodeid, s);
1479 *s++ = '\0';
1480 }
1481
1482 struct seg *
1483 break_seg(proc_t *p)
1484 {
1485 caddr_t addr = p->p_brkbase;
1486 struct seg *seg;
1487 struct vnode *vp;
1488
1489 if (p->p_brksize != 0)
1490 addr += p->p_brksize - 1;
1491 seg = as_segat(p->p_as, addr);
1492 if (seg != NULL && seg->s_ops == &segvn_ops &&
1493 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1494 return (seg);
1495 return (NULL);
1496 }
1497
1498 /*
1499 * Implementation of service functions to handle procfs generic chained
1500 * copyout buffers.
1501 */
1502 typedef struct pr_iobuf_list {
1503 list_node_t piol_link; /* buffer linkage */
1504 size_t piol_size; /* total size (header + data) */
1505 size_t piol_usedsize; /* amount to copy out from this buf */
1506 } piol_t;
1507
1508 #define MAPSIZE (64 * 1024)
1509 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1510
1511 void
1512 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1513 {
1514 piol_t *iol;
1515 size_t initial_size = MIN(1, n) * itemsize;
1516
1517 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1518
1519 ASSERT(list_head(iolhead) == NULL);
1520 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1521 ASSERT(initial_size > 0);
1522
1523 /*
1524 * Someone creating chained copyout buffers may ask for less than
1525 * MAPSIZE if the amount of data to be buffered is known to be
1526 * smaller than that.
1527 * But in order to prevent involuntary self-denial of service,
1528 * the requested input size is clamped at MAPSIZE.
1529 */
1530 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1531 iol = kmem_alloc(initial_size, KM_SLEEP);
1532 list_insert_head(iolhead, iol);
1533 iol->piol_usedsize = 0;
1534 iol->piol_size = initial_size;
1535 }
1536
1537 void *
1538 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1539 {
1540 piol_t *iol;
1541 char *new;
1542
1543 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1544 ASSERT(list_head(iolhead) != NULL);
1545
1546 iol = (piol_t *)list_tail(iolhead);
1547
1548 if (iol->piol_size <
1549 iol->piol_usedsize + sizeof (*iol) + itemsize) {
1550 /*
1551 * Out of space in the current buffer. Allocate more.
1552 */
1553 piol_t *newiol;
1554
1555 newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1556 newiol->piol_size = MAPSIZE;
1557 newiol->piol_usedsize = 0;
1558
1559 list_insert_after(iolhead, iol, newiol);
1560 iol = list_next(iolhead, iol);
1561 ASSERT(iol == newiol);
1562 }
1563 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1564 iol->piol_usedsize += itemsize;
1565 bzero(new, itemsize);
1566 return (new);
1567 }
1568
1569 int
1570 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1571 {
1572 int error = errin;
1573 piol_t *iol;
1574
1575 while ((iol = list_head(iolhead)) != NULL) {
1576 list_remove(iolhead, iol);
1577 if (!error) {
1578 if (copyout(PIOL_DATABUF(iol), *tgt,
1579 iol->piol_usedsize))
1580 error = EFAULT;
1581 *tgt += iol->piol_usedsize;
1582 }
1583 kmem_free(iol, iol->piol_size);
1584 }
1585 list_destroy(iolhead);
1586
1587 return (error);
1588 }
1589
1590 int
1591 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1592 {
1593 offset_t off = uiop->uio_offset;
1594 char *base;
1595 size_t size;
1596 piol_t *iol;
1597 int error = errin;
1598
1599 while ((iol = list_head(iolhead)) != NULL) {
1600 list_remove(iolhead, iol);
1601 base = PIOL_DATABUF(iol);
1602 size = iol->piol_usedsize;
1603 if (off <= size && error == 0 && uiop->uio_resid > 0)
1604 error = uiomove(base + off, size - off,
1605 UIO_READ, uiop);
1606 off = MAX(0, off - (offset_t)size);
1607 kmem_free(iol, iol->piol_size);
1608 }
1609 list_destroy(iolhead);
1610
1611 return (error);
1612 }
1613
1614 /*
1615 * Return an array of structures with memory map information.
1616 * We allocate here; the caller must deallocate.
1617 */
1618 int
1619 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1620 {
1621 struct as *as = p->p_as;
1622 prmap_t *mp;
1623 struct seg *seg;
1624 struct seg *brkseg, *stkseg;
1625 struct vnode *vp;
1626 struct vattr vattr;
1627 uint_t prot;
1628
1629 ASSERT(as != &kas && AS_WRITE_HELD(as));
1630
1631 /*
1632 * Request an initial buffer size that doesn't waste memory
1633 * if the address space has only a small number of segments.
1634 */
1635 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1636
1637 if ((seg = AS_SEGFIRST(as)) == NULL)
1638 return (0);
1639
1640 brkseg = break_seg(p);
1641 stkseg = as_segat(as, prgetstackbase(p));
1642
1643 do {
1644 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1645 caddr_t saddr, naddr;
1646 void *tmp = NULL;
1647
1648 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1649 prot = pr_getprot(seg, reserved, &tmp,
1650 &saddr, &naddr, eaddr);
1651 if (saddr == naddr)
1652 continue;
1653
1654 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1655
1656 mp->pr_vaddr = (uintptr_t)saddr;
1657 mp->pr_size = naddr - saddr;
1658 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1659 mp->pr_mflags = 0;
1660 if (prot & PROT_READ)
1661 mp->pr_mflags |= MA_READ;
1662 if (prot & PROT_WRITE)
1663 mp->pr_mflags |= MA_WRITE;
1664 if (prot & PROT_EXEC)
1665 mp->pr_mflags |= MA_EXEC;
1666 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1667 mp->pr_mflags |= MA_SHARED;
1668 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1669 mp->pr_mflags |= MA_NORESERVE;
1670 if (seg->s_ops == &segspt_shmops ||
1671 (seg->s_ops == &segvn_ops &&
1672 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1673 mp->pr_mflags |= MA_ANON;
1674 if (seg == brkseg)
1675 mp->pr_mflags |= MA_BREAK;
1676 else if (seg == stkseg) {
1677 mp->pr_mflags |= MA_STACK;
1678 if (reserved) {
1679 size_t maxstack =
1680 ((size_t)p->p_stk_ctl +
1681 PAGEOFFSET) & PAGEMASK;
1682 mp->pr_vaddr =
1683 (uintptr_t)prgetstackbase(p) +
1684 p->p_stksize - maxstack;
1685 mp->pr_size = (uintptr_t)naddr -
1686 mp->pr_vaddr;
1687 }
1688 }
1689 if (seg->s_ops == &segspt_shmops)
1690 mp->pr_mflags |= MA_ISM | MA_SHM;
1691 mp->pr_pagesize = PAGESIZE;
1692
1693 /*
1694 * Manufacture a filename for the "object" directory.
1695 */
1696 vattr.va_mask = AT_FSID|AT_NODEID;
1697 if (seg->s_ops == &segvn_ops &&
1698 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1699 vp != NULL && vp->v_type == VREG &&
1700 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1701 if (vp == p->p_exec)
1702 (void) strcpy(mp->pr_mapname, "a.out");
1703 else
1704 pr_object_name(mp->pr_mapname,
1705 vp, &vattr);
1706 }
1707
1708 /*
1709 * Get the SysV shared memory id, if any.
1710 */
1711 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1712 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1713 SHMID_NONE) {
1714 if (mp->pr_shmid == SHMID_FREE)
1715 mp->pr_shmid = -1;
1716
1717 mp->pr_mflags |= MA_SHM;
1718 } else {
1719 mp->pr_shmid = -1;
1720 }
1721 }
1722 ASSERT(tmp == NULL);
1723 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1724
1725 return (0);
1726 }
1727
1728 #ifdef _SYSCALL32_IMPL
1729 int
1730 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1731 {
1732 struct as *as = p->p_as;
1733 prmap32_t *mp;
1734 struct seg *seg;
1735 struct seg *brkseg, *stkseg;
1736 struct vnode *vp;
1737 struct vattr vattr;
1738 uint_t prot;
1739
1740 ASSERT(as != &kas && AS_WRITE_HELD(as));
1741
1742 /*
1743 * Request an initial buffer size that doesn't waste memory
1744 * if the address space has only a small number of segments.
1745 */
1746 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1747
1748 if ((seg = AS_SEGFIRST(as)) == NULL)
1749 return (0);
1750
1751 brkseg = break_seg(p);
1752 stkseg = as_segat(as, prgetstackbase(p));
1753
1754 do {
1755 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1756 caddr_t saddr, naddr;
1757 void *tmp = NULL;
1758
1759 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1760 prot = pr_getprot(seg, reserved, &tmp,
1761 &saddr, &naddr, eaddr);
1762 if (saddr == naddr)
1763 continue;
1764
1765 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1766
1767 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1768 mp->pr_size = (size32_t)(naddr - saddr);
1769 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1770 mp->pr_mflags = 0;
1771 if (prot & PROT_READ)
1772 mp->pr_mflags |= MA_READ;
1773 if (prot & PROT_WRITE)
1774 mp->pr_mflags |= MA_WRITE;
1775 if (prot & PROT_EXEC)
1776 mp->pr_mflags |= MA_EXEC;
1777 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1778 mp->pr_mflags |= MA_SHARED;
1779 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1780 mp->pr_mflags |= MA_NORESERVE;
1781 if (seg->s_ops == &segspt_shmops ||
1782 (seg->s_ops == &segvn_ops &&
1783 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1784 mp->pr_mflags |= MA_ANON;
1785 if (seg == brkseg)
1786 mp->pr_mflags |= MA_BREAK;
1787 else if (seg == stkseg) {
1788 mp->pr_mflags |= MA_STACK;
1789 if (reserved) {
1790 size_t maxstack =
1791 ((size_t)p->p_stk_ctl +
1792 PAGEOFFSET) & PAGEMASK;
1793 uintptr_t vaddr =
1794 (uintptr_t)prgetstackbase(p) +
1795 p->p_stksize - maxstack;
1796 mp->pr_vaddr = (caddr32_t)vaddr;
1797 mp->pr_size = (size32_t)
1798 ((uintptr_t)naddr - vaddr);
1799 }
1800 }
1801 if (seg->s_ops == &segspt_shmops)
1802 mp->pr_mflags |= MA_ISM | MA_SHM;
1803 mp->pr_pagesize = PAGESIZE;
1804
1805 /*
1806 * Manufacture a filename for the "object" directory.
1807 */
1808 vattr.va_mask = AT_FSID|AT_NODEID;
1809 if (seg->s_ops == &segvn_ops &&
1810 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1811 vp != NULL && vp->v_type == VREG &&
1812 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1813 if (vp == p->p_exec)
1814 (void) strcpy(mp->pr_mapname, "a.out");
1815 else
1816 pr_object_name(mp->pr_mapname,
1817 vp, &vattr);
1818 }
1819
1820 /*
1821 * Get the SysV shared memory id, if any.
1822 */
1823 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1824 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1825 SHMID_NONE) {
1826 if (mp->pr_shmid == SHMID_FREE)
1827 mp->pr_shmid = -1;
1828
1829 mp->pr_mflags |= MA_SHM;
1830 } else {
1831 mp->pr_shmid = -1;
1832 }
1833 }
1834 ASSERT(tmp == NULL);
1835 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1836
1837 return (0);
1838 }
1839 #endif /* _SYSCALL32_IMPL */
1840
1841 /*
1842 * Return the size of the /proc page data file.
1843 */
1844 size_t
1845 prpdsize(struct as *as)
1846 {
1847 struct seg *seg;
1848 size_t size;
1849
1850 ASSERT(as != &kas && AS_WRITE_HELD(as));
1851
1852 if ((seg = AS_SEGFIRST(as)) == NULL)
1853 return (0);
1854
1855 size = sizeof (prpageheader_t);
1856 do {
1857 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1858 caddr_t saddr, naddr;
1859 void *tmp = NULL;
1860 size_t npage;
1861
1862 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1863 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1864 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1865 size += sizeof (prasmap_t) + round8(npage);
1866 }
1867 ASSERT(tmp == NULL);
1868 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1869
1870 return (size);
1871 }
1872
1873 #ifdef _SYSCALL32_IMPL
1874 size_t
1875 prpdsize32(struct as *as)
1876 {
1877 struct seg *seg;
1878 size_t size;
1879
1880 ASSERT(as != &kas && AS_WRITE_HELD(as));
1881
1882 if ((seg = AS_SEGFIRST(as)) == NULL)
1883 return (0);
1884
1885 size = sizeof (prpageheader32_t);
1886 do {
1887 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1888 caddr_t saddr, naddr;
1889 void *tmp = NULL;
1890 size_t npage;
1891
1892 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1893 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1894 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1895 size += sizeof (prasmap32_t) + round8(npage);
1896 }
1897 ASSERT(tmp == NULL);
1898 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1899
1900 return (size);
1901 }
1902 #endif /* _SYSCALL32_IMPL */
1903
1904 /*
1905 * Read page data information.
1906 */
1907 int
1908 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1909 {
1910 struct as *as = p->p_as;
1911 caddr_t buf;
1912 size_t size;
1913 prpageheader_t *php;
1914 prasmap_t *pmp;
1915 struct seg *seg;
1916 int error;
1917
1918 again:
1919 AS_LOCK_ENTER(as, RW_WRITER);
1920
1921 if ((seg = AS_SEGFIRST(as)) == NULL) {
1922 AS_LOCK_EXIT(as);
1923 return (0);
1924 }
1925 size = prpdsize(as);
1926 if (uiop->uio_resid < size) {
1927 AS_LOCK_EXIT(as);
1928 return (E2BIG);
1929 }
1930
1931 buf = kmem_zalloc(size, KM_SLEEP);
1932 php = (prpageheader_t *)buf;
1933 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1934
1935 hrt2ts(gethrtime(), &php->pr_tstamp);
1936 php->pr_nmap = 0;
1937 php->pr_npage = 0;
1938 do {
1939 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1940 caddr_t saddr, naddr;
1941 void *tmp = NULL;
1942
1943 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1944 struct vnode *vp;
1945 struct vattr vattr;
1946 size_t len;
1947 size_t npage;
1948 uint_t prot;
1949 uintptr_t next;
1950
1951 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1952 if ((len = (size_t)(naddr - saddr)) == 0)
1953 continue;
1954 npage = len / PAGESIZE;
1955 next = (uintptr_t)(pmp + 1) + round8(npage);
1956 /*
1957 * It's possible that the address space can change
1958 * subtlely even though we're holding as->a_lock
1959 * due to the nondeterminism of page_exists() in
1960 * the presence of asychronously flushed pages or
1961 * mapped files whose sizes are changing.
1962 * page_exists() may be called indirectly from
1963 * pr_getprot() by a SEGOP_INCORE() routine.
1964 * If this happens we need to make sure we don't
1965 * overrun the buffer whose size we computed based
1966 * on the initial iteration through the segments.
1967 * Once we've detected an overflow, we need to clean
1968 * up the temporary memory allocated in pr_getprot()
1969 * and retry. If there's a pending signal, we return
1970 * EINTR so that this thread can be dislodged if
1971 * a latent bug causes us to spin indefinitely.
1972 */
1973 if (next > (uintptr_t)buf + size) {
1974 pr_getprot_done(&tmp);
1975 AS_LOCK_EXIT(as);
1976
1977 kmem_free(buf, size);
1978
1979 if (ISSIG(curthread, JUSTLOOKING))
1980 return (EINTR);
1981
1982 goto again;
1983 }
1984
1985 php->pr_nmap++;
1986 php->pr_npage += npage;
1987 pmp->pr_vaddr = (uintptr_t)saddr;
1988 pmp->pr_npage = npage;
1989 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1990 pmp->pr_mflags = 0;
1991 if (prot & PROT_READ)
1992 pmp->pr_mflags |= MA_READ;
1993 if (prot & PROT_WRITE)
1994 pmp->pr_mflags |= MA_WRITE;
1995 if (prot & PROT_EXEC)
1996 pmp->pr_mflags |= MA_EXEC;
1997 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1998 pmp->pr_mflags |= MA_SHARED;
1999 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2000 pmp->pr_mflags |= MA_NORESERVE;
2001 if (seg->s_ops == &segspt_shmops ||
2002 (seg->s_ops == &segvn_ops &&
2003 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2004 pmp->pr_mflags |= MA_ANON;
2005 if (seg->s_ops == &segspt_shmops)
2006 pmp->pr_mflags |= MA_ISM | MA_SHM;
2007 pmp->pr_pagesize = PAGESIZE;
2008 /*
2009 * Manufacture a filename for the "object" directory.
2010 */
2011 vattr.va_mask = AT_FSID|AT_NODEID;
2012 if (seg->s_ops == &segvn_ops &&
2013 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2014 vp != NULL && vp->v_type == VREG &&
2015 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2016 if (vp == p->p_exec)
2017 (void) strcpy(pmp->pr_mapname, "a.out");
2018 else
2019 pr_object_name(pmp->pr_mapname,
2020 vp, &vattr);
2021 }
2022
2023 /*
2024 * Get the SysV shared memory id, if any.
2025 */
2026 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2027 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2028 SHMID_NONE) {
2029 if (pmp->pr_shmid == SHMID_FREE)
2030 pmp->pr_shmid = -1;
2031
2032 pmp->pr_mflags |= MA_SHM;
2033 } else {
2034 pmp->pr_shmid = -1;
2035 }
2036
2037 hat_getstat(as, saddr, len, hatid,
2038 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2039 pmp = (prasmap_t *)next;
2040 }
2041 ASSERT(tmp == NULL);
2042 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2043
2044 AS_LOCK_EXIT(as);
2045
2046 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2047 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2048 kmem_free(buf, size);
2049
2050 return (error);
2051 }
2052
2053 #ifdef _SYSCALL32_IMPL
2054 int
2055 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2056 {
2057 struct as *as = p->p_as;
2058 caddr_t buf;
2059 size_t size;
2060 prpageheader32_t *php;
2061 prasmap32_t *pmp;
2062 struct seg *seg;
2063 int error;
2064
2065 again:
2066 AS_LOCK_ENTER(as, RW_WRITER);
2067
2068 if ((seg = AS_SEGFIRST(as)) == NULL) {
2069 AS_LOCK_EXIT(as);
2070 return (0);
2071 }
2072 size = prpdsize32(as);
2073 if (uiop->uio_resid < size) {
2074 AS_LOCK_EXIT(as);
2075 return (E2BIG);
2076 }
2077
2078 buf = kmem_zalloc(size, KM_SLEEP);
2079 php = (prpageheader32_t *)buf;
2080 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2081
2082 hrt2ts32(gethrtime(), &php->pr_tstamp);
2083 php->pr_nmap = 0;
2084 php->pr_npage = 0;
2085 do {
2086 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2087 caddr_t saddr, naddr;
2088 void *tmp = NULL;
2089
2090 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2091 struct vnode *vp;
2092 struct vattr vattr;
2093 size_t len;
2094 size_t npage;
2095 uint_t prot;
2096 uintptr_t next;
2097
2098 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2099 if ((len = (size_t)(naddr - saddr)) == 0)
2100 continue;
2101 npage = len / PAGESIZE;
2102 next = (uintptr_t)(pmp + 1) + round8(npage);
2103 /*
2104 * It's possible that the address space can change
2105 * subtlely even though we're holding as->a_lock
2106 * due to the nondeterminism of page_exists() in
2107 * the presence of asychronously flushed pages or
2108 * mapped files whose sizes are changing.
2109 * page_exists() may be called indirectly from
2110 * pr_getprot() by a SEGOP_INCORE() routine.
2111 * If this happens we need to make sure we don't
2112 * overrun the buffer whose size we computed based
2113 * on the initial iteration through the segments.
2114 * Once we've detected an overflow, we need to clean
2115 * up the temporary memory allocated in pr_getprot()
2116 * and retry. If there's a pending signal, we return
2117 * EINTR so that this thread can be dislodged if
2118 * a latent bug causes us to spin indefinitely.
2119 */
2120 if (next > (uintptr_t)buf + size) {
2121 pr_getprot_done(&tmp);
2122 AS_LOCK_EXIT(as);
2123
2124 kmem_free(buf, size);
2125
2126 if (ISSIG(curthread, JUSTLOOKING))
2127 return (EINTR);
2128
2129 goto again;
2130 }
2131
2132 php->pr_nmap++;
2133 php->pr_npage += npage;
2134 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2135 pmp->pr_npage = (size32_t)npage;
2136 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2137 pmp->pr_mflags = 0;
2138 if (prot & PROT_READ)
2139 pmp->pr_mflags |= MA_READ;
2140 if (prot & PROT_WRITE)
2141 pmp->pr_mflags |= MA_WRITE;
2142 if (prot & PROT_EXEC)
2143 pmp->pr_mflags |= MA_EXEC;
2144 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2145 pmp->pr_mflags |= MA_SHARED;
2146 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2147 pmp->pr_mflags |= MA_NORESERVE;
2148 if (seg->s_ops == &segspt_shmops ||
2149 (seg->s_ops == &segvn_ops &&
2150 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2151 pmp->pr_mflags |= MA_ANON;
2152 if (seg->s_ops == &segspt_shmops)
2153 pmp->pr_mflags |= MA_ISM | MA_SHM;
2154 pmp->pr_pagesize = PAGESIZE;
2155 /*
2156 * Manufacture a filename for the "object" directory.
2157 */
2158 vattr.va_mask = AT_FSID|AT_NODEID;
2159 if (seg->s_ops == &segvn_ops &&
2160 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2161 vp != NULL && vp->v_type == VREG &&
2162 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2163 if (vp == p->p_exec)
2164 (void) strcpy(pmp->pr_mapname, "a.out");
2165 else
2166 pr_object_name(pmp->pr_mapname,
2167 vp, &vattr);
2168 }
2169
2170 /*
2171 * Get the SysV shared memory id, if any.
2172 */
2173 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2174 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2175 SHMID_NONE) {
2176 if (pmp->pr_shmid == SHMID_FREE)
2177 pmp->pr_shmid = -1;
2178
2179 pmp->pr_mflags |= MA_SHM;
2180 } else {
2181 pmp->pr_shmid = -1;
2182 }
2183
2184 hat_getstat(as, saddr, len, hatid,
2185 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2186 pmp = (prasmap32_t *)next;
2187 }
2188 ASSERT(tmp == NULL);
2189 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2190
2191 AS_LOCK_EXIT(as);
2192
2193 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2194 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2195 kmem_free(buf, size);
2196
2197 return (error);
2198 }
2199 #endif /* _SYSCALL32_IMPL */
2200
2201 ushort_t
2202 prgetpctcpu(uint64_t pct)
2203 {
2204 /*
2205 * The value returned will be relevant in the zone of the examiner,
2206 * which may not be the same as the zone which performed the procfs
2207 * mount.
2208 */
2209 int nonline = zone_ncpus_online_get(curproc->p_zone);
2210
2211 /*
2212 * Prorate over online cpus so we don't exceed 100%
2213 */
2214 if (nonline > 1)
2215 pct /= nonline;
2216 pct >>= 16; /* convert to 16-bit scaled integer */
2217 if (pct > 0x8000) /* might happen, due to rounding */
2218 pct = 0x8000;
2219 return ((ushort_t)pct);
2220 }
2221
2222 /*
2223 * Return information used by ps(1).
2224 */
2225 void
2226 prgetpsinfo(proc_t *p, psinfo_t *psp)
2227 {
2228 kthread_t *t;
2229 struct cred *cred;
2230 hrtime_t hrutime, hrstime;
2231
2232 ASSERT(MUTEX_HELD(&p->p_lock));
2233
2234 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2235 bzero(psp, sizeof (*psp));
2236 else {
2237 thread_unlock(t);
2238 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2239 }
2240
2241 /*
2242 * only export SSYS and SMSACCT; everything else is off-limits to
2243 * userland apps.
2244 */
2245 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2246 psp->pr_nlwp = p->p_lwpcnt;
2247 psp->pr_nzomb = p->p_zombcnt;
2248 mutex_enter(&p->p_crlock);
2249 cred = p->p_cred;
2250 psp->pr_uid = crgetruid(cred);
2251 psp->pr_euid = crgetuid(cred);
2252 psp->pr_gid = crgetrgid(cred);
2253 psp->pr_egid = crgetgid(cred);
2254 mutex_exit(&p->p_crlock);
2255 psp->pr_pid = p->p_pid;
2256 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2257 (p->p_flag & SZONETOP)) {
2258 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2259 /*
2260 * Inside local zones, fake zsched's pid as parent pids for
2261 * processes which reference processes outside of the zone.
2262 */
2263 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2264 } else {
2265 psp->pr_ppid = p->p_ppid;
2266 }
2267 psp->pr_pgid = p->p_pgrp;
2268 psp->pr_sid = p->p_sessp->s_sid;
2269 psp->pr_taskid = p->p_task->tk_tkid;
2270 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2271 psp->pr_poolid = p->p_pool->pool_id;
2272 psp->pr_zoneid = p->p_zone->zone_id;
2273 if ((psp->pr_contract = PRCTID(p)) == 0)
2274 psp->pr_contract = -1;
2275 psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2276 switch (p->p_model) {
2277 case DATAMODEL_ILP32:
2278 psp->pr_dmodel = PR_MODEL_ILP32;
2279 break;
2280 case DATAMODEL_LP64:
2281 psp->pr_dmodel = PR_MODEL_LP64;
2282 break;
2283 }
2284 hrutime = mstate_aggr_state(p, LMS_USER);
2285 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2286 hrt2ts((hrutime + hrstime), &psp->pr_time);
2287 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2288
2289 if (t == NULL) {
2290 int wcode = p->p_wcode; /* must be atomic read */
2291
2292 if (wcode)
2293 psp->pr_wstat = wstat(wcode, p->p_wdata);
2294 psp->pr_ttydev = PRNODEV;
2295 psp->pr_lwp.pr_state = SZOMB;
2296 psp->pr_lwp.pr_sname = 'Z';
2297 psp->pr_lwp.pr_bindpro = PBIND_NONE;
2298 psp->pr_lwp.pr_bindpset = PS_NONE;
2299 } else {
2300 user_t *up = PTOU(p);
2301 struct as *as;
2302 dev_t d;
2303 extern dev_t rwsconsdev, rconsdev, uconsdev;
2304
2305 d = cttydev(p);
2306 /*
2307 * If the controlling terminal is the real
2308 * or workstation console device, map to what the
2309 * user thinks is the console device. Handle case when
2310 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2311 */
2312 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2313 d = uconsdev;
2314 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2315 psp->pr_start = up->u_start;
2316 bcopy(up->u_comm, psp->pr_fname,
2317 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2318 bcopy(up->u_psargs, psp->pr_psargs,
2319 MIN(PRARGSZ-1, PSARGSZ));
2320 psp->pr_argc = up->u_argc;
2321 psp->pr_argv = up->u_argv;
2322 psp->pr_envp = up->u_envp;
2323
2324 /* get the chosen lwp's lwpsinfo */
2325 prgetlwpsinfo(t, &psp->pr_lwp);
2326
2327 /* compute %cpu for the process */
2328 if (p->p_lwpcnt == 1)
2329 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2330 else {
2331 uint64_t pct = 0;
2332 hrtime_t cur_time = gethrtime_unscaled();
2333
2334 t = p->p_tlist;
2335 do {
2336 pct += cpu_update_pct(t, cur_time);
2337 } while ((t = t->t_forw) != p->p_tlist);
2338
2339 psp->pr_pctcpu = prgetpctcpu(pct);
2340 }
2341 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2342 psp->pr_size = 0;
2343 psp->pr_rssize = 0;
2344 } else {
2345 mutex_exit(&p->p_lock);
2346 AS_LOCK_ENTER(as, RW_READER);
2347 psp->pr_size = btopr(as->a_resvsize) *
2348 (PAGESIZE / 1024);
2349 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2350 psp->pr_pctmem = rm_pctmemory(as);
2351 AS_LOCK_EXIT(as);
2352 mutex_enter(&p->p_lock);
2353 }
2354 }
2355 }
2356
2357 #ifdef _SYSCALL32_IMPL
2358 void
2359 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2360 {
2361 kthread_t *t;
2362 struct cred *cred;
2363 hrtime_t hrutime, hrstime;
2364
2365 ASSERT(MUTEX_HELD(&p->p_lock));
2366
2367 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2368 bzero(psp, sizeof (*psp));
2369 else {
2370 thread_unlock(t);
2371 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2372 }
2373
2374 /*
2375 * only export SSYS and SMSACCT; everything else is off-limits to
2376 * userland apps.
2377 */
2378 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2379 psp->pr_nlwp = p->p_lwpcnt;
2380 psp->pr_nzomb = p->p_zombcnt;
2381 mutex_enter(&p->p_crlock);
2382 cred = p->p_cred;
2383 psp->pr_uid = crgetruid(cred);
2384 psp->pr_euid = crgetuid(cred);
2385 psp->pr_gid = crgetrgid(cred);
2386 psp->pr_egid = crgetgid(cred);
2387 mutex_exit(&p->p_crlock);
2388 psp->pr_pid = p->p_pid;
2389 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2390 (p->p_flag & SZONETOP)) {
2391 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2392 /*
2393 * Inside local zones, fake zsched's pid as parent pids for
2394 * processes which reference processes outside of the zone.
2395 */
2396 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2397 } else {
2398 psp->pr_ppid = p->p_ppid;
2399 }
2400 psp->pr_pgid = p->p_pgrp;
2401 psp->pr_sid = p->p_sessp->s_sid;
2402 psp->pr_taskid = p->p_task->tk_tkid;
2403 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2404 psp->pr_poolid = p->p_pool->pool_id;
2405 psp->pr_zoneid = p->p_zone->zone_id;
2406 if ((psp->pr_contract = PRCTID(p)) == 0)
2407 psp->pr_contract = -1;
2408 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2409 switch (p->p_model) {
2410 case DATAMODEL_ILP32:
2411 psp->pr_dmodel = PR_MODEL_ILP32;
2412 break;
2413 case DATAMODEL_LP64:
2414 psp->pr_dmodel = PR_MODEL_LP64;
2415 break;
2416 }
2417 hrutime = mstate_aggr_state(p, LMS_USER);
2418 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2419 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2420 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2421
2422 if (t == NULL) {
2423 extern int wstat(int, int); /* needs a header file */
2424 int wcode = p->p_wcode; /* must be atomic read */
2425
2426 if (wcode)
2427 psp->pr_wstat = wstat(wcode, p->p_wdata);
2428 psp->pr_ttydev = PRNODEV32;
2429 psp->pr_lwp.pr_state = SZOMB;
2430 psp->pr_lwp.pr_sname = 'Z';
2431 } else {
2432 user_t *up = PTOU(p);
2433 struct as *as;
2434 dev_t d;
2435 extern dev_t rwsconsdev, rconsdev, uconsdev;
2436
2437 d = cttydev(p);
2438 /*
2439 * If the controlling terminal is the real
2440 * or workstation console device, map to what the
2441 * user thinks is the console device. Handle case when
2442 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2443 */
2444 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2445 d = uconsdev;
2446 (void) cmpldev(&psp->pr_ttydev, d);
2447 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2448 bcopy(up->u_comm, psp->pr_fname,
2449 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2450 bcopy(up->u_psargs, psp->pr_psargs,
2451 MIN(PRARGSZ-1, PSARGSZ));
2452 psp->pr_argc = up->u_argc;
2453 psp->pr_argv = (caddr32_t)up->u_argv;
2454 psp->pr_envp = (caddr32_t)up->u_envp;
2455
2456 /* get the chosen lwp's lwpsinfo */
2457 prgetlwpsinfo32(t, &psp->pr_lwp);
2458
2459 /* compute %cpu for the process */
2460 if (p->p_lwpcnt == 1)
2461 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2462 else {
2463 uint64_t pct = 0;
2464 hrtime_t cur_time;
2465
2466 t = p->p_tlist;
2467 cur_time = gethrtime_unscaled();
2468 do {
2469 pct += cpu_update_pct(t, cur_time);
2470 } while ((t = t->t_forw) != p->p_tlist);
2471
2472 psp->pr_pctcpu = prgetpctcpu(pct);
2473 }
2474 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2475 psp->pr_size = 0;
2476 psp->pr_rssize = 0;
2477 } else {
2478 mutex_exit(&p->p_lock);
2479 AS_LOCK_ENTER(as, RW_READER);
2480 psp->pr_size = (size32_t)
2481 (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2482 psp->pr_rssize = (size32_t)
2483 (rm_asrss(as) * (PAGESIZE / 1024));
2484 psp->pr_pctmem = rm_pctmemory(as);
2485 AS_LOCK_EXIT(as);
2486 mutex_enter(&p->p_lock);
2487 }
2488 }
2489
2490 /*
2491 * If we are looking at an LP64 process, zero out
2492 * the fields that cannot be represented in ILP32.
2493 */
2494 if (p->p_model != DATAMODEL_ILP32) {
2495 psp->pr_size = 0;
2496 psp->pr_rssize = 0;
2497 psp->pr_argv = 0;
2498 psp->pr_envp = 0;
2499 }
2500 }
2501
2502 #endif /* _SYSCALL32_IMPL */
2503
2504 void
2505 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2506 {
2507 klwp_t *lwp = ttolwp(t);
2508 sobj_ops_t *sobj;
2509 char c, state;
2510 uint64_t pct;
2511 int retval, niceval;
2512 hrtime_t hrutime, hrstime;
2513
2514 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2515
2516 bzero(psp, sizeof (*psp));
2517
2518 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2519 psp->pr_lwpid = t->t_tid;
2520 psp->pr_addr = (uintptr_t)t;
2521 psp->pr_wchan = (uintptr_t)t->t_wchan;
2522
2523 /* map the thread state enum into a process state enum */
2524 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2525 switch (state) {
2526 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2527 case TS_RUN: state = SRUN; c = 'R'; break;
2528 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2529 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2530 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2531 case TS_WAIT: state = SWAIT; c = 'W'; break;
2532 default: state = 0; c = '?'; break;
2533 }
2534 psp->pr_state = state;
2535 psp->pr_sname = c;
2536 if ((sobj = t->t_sobj_ops) != NULL)
2537 psp->pr_stype = SOBJ_TYPE(sobj);
2538 retval = CL_DONICE(t, NULL, 0, &niceval);
2539 if (retval == 0) {
2540 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2541 psp->pr_nice = niceval + NZERO;
2542 }
2543 psp->pr_syscall = t->t_sysnum;
2544 psp->pr_pri = t->t_pri;
2545 psp->pr_start.tv_sec = t->t_start;
2546 psp->pr_start.tv_nsec = 0L;
2547 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2548 scalehrtime(&hrutime);
2549 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2550 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2551 scalehrtime(&hrstime);
2552 hrt2ts(hrutime + hrstime, &psp->pr_time);
2553 /* compute %cpu for the lwp */
2554 pct = cpu_update_pct(t, gethrtime_unscaled());
2555 psp->pr_pctcpu = prgetpctcpu(pct);
2556 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2557 if (psp->pr_cpu > 99)
2558 psp->pr_cpu = 99;
2559
2560 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2561 sizeof (psp->pr_clname) - 1);
2562 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2563 psp->pr_onpro = t->t_cpu->cpu_id;
2564 psp->pr_bindpro = t->t_bind_cpu;
2565 psp->pr_bindpset = t->t_bind_pset;
2566 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2567 }
2568
2569 #ifdef _SYSCALL32_IMPL
2570 void
2571 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2572 {
2573 proc_t *p = ttoproc(t);
2574 klwp_t *lwp = ttolwp(t);
2575 sobj_ops_t *sobj;
2576 char c, state;
2577 uint64_t pct;
2578 int retval, niceval;
2579 hrtime_t hrutime, hrstime;
2580
2581 ASSERT(MUTEX_HELD(&p->p_lock));
2582
2583 bzero(psp, sizeof (*psp));
2584
2585 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2586 psp->pr_lwpid = t->t_tid;
2587 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2588 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */
2589
2590 /* map the thread state enum into a process state enum */
2591 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2592 switch (state) {
2593 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2594 case TS_RUN: state = SRUN; c = 'R'; break;
2595 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2596 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2597 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2598 case TS_WAIT: state = SWAIT; c = 'W'; break;
2599 default: state = 0; c = '?'; break;
2600 }
2601 psp->pr_state = state;
2602 psp->pr_sname = c;
2603 if ((sobj = t->t_sobj_ops) != NULL)
2604 psp->pr_stype = SOBJ_TYPE(sobj);
2605 retval = CL_DONICE(t, NULL, 0, &niceval);
2606 if (retval == 0) {
2607 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2608 psp->pr_nice = niceval + NZERO;
2609 } else {
2610 psp->pr_oldpri = 0;
2611 psp->pr_nice = 0;
2612 }
2613 psp->pr_syscall = t->t_sysnum;
2614 psp->pr_pri = t->t_pri;
2615 psp->pr_start.tv_sec = (time32_t)t->t_start;
2616 psp->pr_start.tv_nsec = 0L;
2617 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2618 scalehrtime(&hrutime);
2619 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2620 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2621 scalehrtime(&hrstime);
2622 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2623 /* compute %cpu for the lwp */
2624 pct = cpu_update_pct(t, gethrtime_unscaled());
2625 psp->pr_pctcpu = prgetpctcpu(pct);
2626 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2627 if (psp->pr_cpu > 99)
2628 psp->pr_cpu = 99;
2629
2630 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2631 sizeof (psp->pr_clname) - 1);
2632 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2633 psp->pr_onpro = t->t_cpu->cpu_id;
2634 psp->pr_bindpro = t->t_bind_cpu;
2635 psp->pr_bindpset = t->t_bind_pset;
2636 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2637 }
2638 #endif /* _SYSCALL32_IMPL */
2639
2640 #ifdef _SYSCALL32_IMPL
2641
2642 #define PR_COPY_FIELD(s, d, field) d->field = s->field
2643
2644 #define PR_COPY_FIELD_ILP32(s, d, field) \
2645 if (s->pr_dmodel == PR_MODEL_ILP32) { \
2646 d->field = s->field; \
2647 }
2648
2649 #define PR_COPY_TIMESPEC(s, d, field) \
2650 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
2651
2652 #define PR_COPY_BUF(s, d, field) \
2653 bcopy(s->field, d->field, sizeof (d->field));
2654
2655 #define PR_IGNORE_FIELD(s, d, field)
2656
2657 void
2658 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
2659 {
2660 bzero(dest, sizeof (*dest));
2661
2662 PR_COPY_FIELD(src, dest, pr_flag);
2663 PR_COPY_FIELD(src, dest, pr_lwpid);
2664 PR_IGNORE_FIELD(src, dest, pr_addr);
2665 PR_IGNORE_FIELD(src, dest, pr_wchan);
2666 PR_COPY_FIELD(src, dest, pr_stype);
2667 PR_COPY_FIELD(src, dest, pr_state);
2668 PR_COPY_FIELD(src, dest, pr_sname);
2669 PR_COPY_FIELD(src, dest, pr_nice);
2670 PR_COPY_FIELD(src, dest, pr_syscall);
2671 PR_COPY_FIELD(src, dest, pr_oldpri);
2672 PR_COPY_FIELD(src, dest, pr_cpu);
2673 PR_COPY_FIELD(src, dest, pr_pri);
2674 PR_COPY_FIELD(src, dest, pr_pctcpu);
2675 PR_COPY_TIMESPEC(src, dest, pr_start);
2676 PR_COPY_BUF(src, dest, pr_clname);
2677 PR_COPY_BUF(src, dest, pr_name);
2678 PR_COPY_FIELD(src, dest, pr_onpro);
2679 PR_COPY_FIELD(src, dest, pr_bindpro);
2680 PR_COPY_FIELD(src, dest, pr_bindpset);
2681 PR_COPY_FIELD(src, dest, pr_lgrp);
2682 }
2683
2684 void
2685 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
2686 {
2687 bzero(dest, sizeof (*dest));
2688
2689 PR_COPY_FIELD(src, dest, pr_flag);
2690 PR_COPY_FIELD(src, dest, pr_nlwp);
2691 PR_COPY_FIELD(src, dest, pr_pid);
2692 PR_COPY_FIELD(src, dest, pr_ppid);
2693 PR_COPY_FIELD(src, dest, pr_pgid);
2694 PR_COPY_FIELD(src, dest, pr_sid);
2695 PR_COPY_FIELD(src, dest, pr_uid);
2696 PR_COPY_FIELD(src, dest, pr_euid);
2697 PR_COPY_FIELD(src, dest, pr_gid);
2698 PR_COPY_FIELD(src, dest, pr_egid);
2699 PR_IGNORE_FIELD(src, dest, pr_addr);
2700 PR_COPY_FIELD_ILP32(src, dest, pr_size);
2701 PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
2702 PR_COPY_FIELD(src, dest, pr_ttydev);
2703 PR_COPY_FIELD(src, dest, pr_pctcpu);
2704 PR_COPY_FIELD(src, dest, pr_pctmem);
2705 PR_COPY_TIMESPEC(src, dest, pr_start);
2706 PR_COPY_TIMESPEC(src, dest, pr_time);
2707 PR_COPY_TIMESPEC(src, dest, pr_ctime);
2708 PR_COPY_BUF(src, dest, pr_fname);
2709 PR_COPY_BUF(src, dest, pr_psargs);
2710 PR_COPY_FIELD(src, dest, pr_wstat);
2711 PR_COPY_FIELD(src, dest, pr_argc);
2712 PR_COPY_FIELD_ILP32(src, dest, pr_argv);
2713 PR_COPY_FIELD_ILP32(src, dest, pr_envp);
2714 PR_COPY_FIELD(src, dest, pr_dmodel);
2715 PR_COPY_FIELD(src, dest, pr_taskid);
2716 PR_COPY_FIELD(src, dest, pr_projid);
2717 PR_COPY_FIELD(src, dest, pr_nzomb);
2718 PR_COPY_FIELD(src, dest, pr_poolid);
2719 PR_COPY_FIELD(src, dest, pr_contract);
2720 PR_COPY_FIELD(src, dest, pr_poolid);
2721 PR_COPY_FIELD(src, dest, pr_poolid);
2722
2723 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
2724 }
2725
2726 #undef PR_COPY_FIELD
2727 #undef PR_COPY_FIELD_ILP32
2728 #undef PR_COPY_TIMESPEC
2729 #undef PR_COPY_BUF
2730 #undef PR_IGNORE_FIELD
2731
2732 #endif /* _SYSCALL32_IMPL */
2733
2734 /*
2735 * This used to get called when microstate accounting was disabled but
2736 * microstate information was requested. Since Microstate accounting is on
2737 * regardless of the proc flags, this simply makes it appear to procfs that
2738 * microstate accounting is on. This is relatively meaningless since you
2739 * can't turn it off, but this is here for the sake of appearances.
2740 */
2741
2742 /*ARGSUSED*/
2743 void
2744 estimate_msacct(kthread_t *t, hrtime_t curtime)
2745 {
2746 proc_t *p;
2747
2748 if (t == NULL)
2749 return;
2750
2751 p = ttoproc(t);
2752 ASSERT(MUTEX_HELD(&p->p_lock));
2753
2754 /*
2755 * A system process (p0) could be referenced if the thread is
2756 * in the process of exiting. Don't turn on microstate accounting
2757 * in that case.
2758 */
2759 if (p->p_flag & SSYS)
2760 return;
2761
2762 /*
2763 * Loop through all the LWPs (kernel threads) in the process.
2764 */
2765 t = p->p_tlist;
2766 do {
2767 t->t_proc_flag |= TP_MSACCT;
2768 } while ((t = t->t_forw) != p->p_tlist);
2769
2770 p->p_flag |= SMSACCT; /* set process-wide MSACCT */
2771 }
2772
2773 /*
2774 * It's not really possible to disable microstate accounting anymore.
2775 * However, this routine simply turns off the ms accounting flags in a process
2776 * This way procfs can still pretend to turn microstate accounting on and
2777 * off for a process, but it actually doesn't do anything. This is
2778 * a neutered form of preemptive idiot-proofing.
2779 */
2780 void
2781 disable_msacct(proc_t *p)
2782 {
2783 kthread_t *t;
2784
2785 ASSERT(MUTEX_HELD(&p->p_lock));
2786
2787 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */
2788 /*
2789 * Loop through all the LWPs (kernel threads) in the process.
2790 */
2791 if ((t = p->p_tlist) != NULL) {
2792 do {
2793 /* clear per-thread flag */
2794 t->t_proc_flag &= ~TP_MSACCT;
2795 } while ((t = t->t_forw) != p->p_tlist);
2796 }
2797 }
2798
2799 /*
2800 * Return resource usage information.
2801 */
2802 void
2803 prgetusage(kthread_t *t, prhusage_t *pup)
2804 {
2805 klwp_t *lwp = ttolwp(t);
2806 hrtime_t *mstimep;
2807 struct mstate *ms = &lwp->lwp_mstate;
2808 int state;
2809 int i;
2810 hrtime_t curtime;
2811 hrtime_t waitrq;
2812 hrtime_t tmp1;
2813
2814 curtime = gethrtime_unscaled();
2815
2816 pup->pr_lwpid = t->t_tid;
2817 pup->pr_count = 1;
2818 pup->pr_create = ms->ms_start;
2819 pup->pr_term = ms->ms_term;
2820 scalehrtime(&pup->pr_create);
2821 scalehrtime(&pup->pr_term);
2822 if (ms->ms_term == 0) {
2823 pup->pr_rtime = curtime - ms->ms_start;
2824 scalehrtime(&pup->pr_rtime);
2825 } else {
2826 pup->pr_rtime = ms->ms_term - ms->ms_start;
2827 scalehrtime(&pup->pr_rtime);
2828 }
2829
2830
2831 pup->pr_utime = ms->ms_acct[LMS_USER];
2832 pup->pr_stime = ms->ms_acct[LMS_SYSTEM];
2833 pup->pr_ttime = ms->ms_acct[LMS_TRAP];
2834 pup->pr_tftime = ms->ms_acct[LMS_TFAULT];
2835 pup->pr_dftime = ms->ms_acct[LMS_DFAULT];
2836 pup->pr_kftime = ms->ms_acct[LMS_KFAULT];
2837 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2838 pup->pr_slptime = ms->ms_acct[LMS_SLEEP];
2839 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2840 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2841
2842 prscaleusage(pup);
2843
2844 /*
2845 * Adjust for time waiting in the dispatcher queue.
2846 */
2847 waitrq = t->t_waitrq; /* hopefully atomic */
2848 if (waitrq != 0) {
2849 if (waitrq > curtime) {
2850 curtime = gethrtime_unscaled();
2851 }
2852 tmp1 = curtime - waitrq;
2853 scalehrtime(&tmp1);
2854 pup->pr_wtime += tmp1;
2855 curtime = waitrq;
2856 }
2857
2858 /*
2859 * Adjust for time spent in current microstate.
2860 */
2861 if (ms->ms_state_start > curtime) {
2862 curtime = gethrtime_unscaled();
2863 }
2864
2865 i = 0;
2866 do {
2867 switch (state = t->t_mstate) {
2868 case LMS_SLEEP:
2869 /*
2870 * Update the timer for the current sleep state.
2871 */
2872 switch (state = ms->ms_prev) {
2873 case LMS_TFAULT:
2874 case LMS_DFAULT:
2875 case LMS_KFAULT:
2876 case LMS_USER_LOCK:
2877 break;
2878 default:
2879 state = LMS_SLEEP;
2880 break;
2881 }
2882 break;
2883 case LMS_TFAULT:
2884 case LMS_DFAULT:
2885 case LMS_KFAULT:
2886 case LMS_USER_LOCK:
2887 state = LMS_SYSTEM;
2888 break;
2889 }
2890 switch (state) {
2891 case LMS_USER: mstimep = &pup->pr_utime; break;
2892 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
2893 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
2894 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
2895 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
2896 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
2897 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
2898 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
2899 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
2900 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
2901 default: panic("prgetusage: unknown microstate");
2902 }
2903 tmp1 = curtime - ms->ms_state_start;
2904 if (tmp1 < 0) {
2905 curtime = gethrtime_unscaled();
2906 i++;
2907 continue;
2908 }
2909 scalehrtime(&tmp1);
2910 } while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2911
2912 *mstimep += tmp1;
2913
2914 /* update pup timestamp */
2915 pup->pr_tstamp = curtime;
2916 scalehrtime(&pup->pr_tstamp);
2917
2918 /*
2919 * Resource usage counters.
2920 */
2921 pup->pr_minf = lwp->lwp_ru.minflt;
2922 pup->pr_majf = lwp->lwp_ru.majflt;
2923 pup->pr_nswap = lwp->lwp_ru.nswap;
2924 pup->pr_inblk = lwp->lwp_ru.inblock;
2925 pup->pr_oublk = lwp->lwp_ru.oublock;
2926 pup->pr_msnd = lwp->lwp_ru.msgsnd;
2927 pup->pr_mrcv = lwp->lwp_ru.msgrcv;
2928 pup->pr_sigs = lwp->lwp_ru.nsignals;
2929 pup->pr_vctx = lwp->lwp_ru.nvcsw;
2930 pup->pr_ictx = lwp->lwp_ru.nivcsw;
2931 pup->pr_sysc = lwp->lwp_ru.sysc;
2932 pup->pr_ioch = lwp->lwp_ru.ioch;
2933 }
2934
2935 /*
2936 * Convert ms_acct stats from unscaled high-res time to nanoseconds
2937 */
2938 void
2939 prscaleusage(prhusage_t *usg)
2940 {
2941 scalehrtime(&usg->pr_utime);
2942 scalehrtime(&usg->pr_stime);
2943 scalehrtime(&usg->pr_ttime);
2944 scalehrtime(&usg->pr_tftime);
2945 scalehrtime(&usg->pr_dftime);
2946 scalehrtime(&usg->pr_kftime);
2947 scalehrtime(&usg->pr_ltime);
2948 scalehrtime(&usg->pr_slptime);
2949 scalehrtime(&usg->pr_wtime);
2950 scalehrtime(&usg->pr_stoptime);
2951 }
2952
2953
2954 /*
2955 * Sum resource usage information.
2956 */
2957 void
2958 praddusage(kthread_t *t, prhusage_t *pup)
2959 {
2960 klwp_t *lwp = ttolwp(t);
2961 hrtime_t *mstimep;
2962 struct mstate *ms = &lwp->lwp_mstate;
2963 int state;
2964 int i;
2965 hrtime_t curtime;
2966 hrtime_t waitrq;
2967 hrtime_t tmp;
2968 prhusage_t conv;
2969
2970 curtime = gethrtime_unscaled();
2971
2972 if (ms->ms_term == 0) {
2973 tmp = curtime - ms->ms_start;
2974 scalehrtime(&tmp);
2975 pup->pr_rtime += tmp;
2976 } else {
2977 tmp = ms->ms_term - ms->ms_start;
2978 scalehrtime(&tmp);
2979 pup->pr_rtime += tmp;
2980 }
2981
2982 conv.pr_utime = ms->ms_acct[LMS_USER];
2983 conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2984 conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2985 conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2986 conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2987 conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2988 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2989 conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2990 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2991 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2992
2993 prscaleusage(&conv);
2994
2995 pup->pr_utime += conv.pr_utime;
2996 pup->pr_stime += conv.pr_stime;
2997 pup->pr_ttime += conv.pr_ttime;
2998 pup->pr_tftime += conv.pr_tftime;
2999 pup->pr_dftime += conv.pr_dftime;
3000 pup->pr_kftime += conv.pr_kftime;
3001 pup->pr_ltime += conv.pr_ltime;
3002 pup->pr_slptime += conv.pr_slptime;
3003 pup->pr_wtime += conv.pr_wtime;
3004 pup->pr_stoptime += conv.pr_stoptime;
3005
3006 /*
3007 * Adjust for time waiting in the dispatcher queue.
3008 */
3009 waitrq = t->t_waitrq; /* hopefully atomic */
3010 if (waitrq != 0) {
3011 if (waitrq > curtime) {
3012 curtime = gethrtime_unscaled();
3013 }
3014 tmp = curtime - waitrq;
3015 scalehrtime(&tmp);
3016 pup->pr_wtime += tmp;
3017 curtime = waitrq;
3018 }
3019
3020 /*
3021 * Adjust for time spent in current microstate.
3022 */
3023 if (ms->ms_state_start > curtime) {
3024 curtime = gethrtime_unscaled();
3025 }
3026
3027 i = 0;
3028 do {
3029 switch (state = t->t_mstate) {
3030 case LMS_SLEEP:
3031 /*
3032 * Update the timer for the current sleep state.
3033 */
3034 switch (state = ms->ms_prev) {
3035 case LMS_TFAULT:
3036 case LMS_DFAULT:
3037 case LMS_KFAULT:
3038 case LMS_USER_LOCK:
3039 break;
3040 default:
3041 state = LMS_SLEEP;
3042 break;
3043 }
3044 break;
3045 case LMS_TFAULT:
3046 case LMS_DFAULT:
3047 case LMS_KFAULT:
3048 case LMS_USER_LOCK:
3049 state = LMS_SYSTEM;
3050 break;
3051 }
3052 switch (state) {
3053 case LMS_USER: mstimep = &pup->pr_utime; break;
3054 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
3055 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
3056 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
3057 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
3058 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
3059 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
3060 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
3061 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
3062 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
3063 default: panic("praddusage: unknown microstate");
3064 }
3065 tmp = curtime - ms->ms_state_start;
3066 if (tmp < 0) {
3067 curtime = gethrtime_unscaled();
3068 i++;
3069 continue;
3070 }
3071 scalehrtime(&tmp);
3072 } while (tmp < 0 && i < MAX_ITERS_SPIN);
3073
3074 *mstimep += tmp;
3075
3076 /* update pup timestamp */
3077 pup->pr_tstamp = curtime;
3078 scalehrtime(&pup->pr_tstamp);
3079
3080 /*
3081 * Resource usage counters.
3082 */
3083 pup->pr_minf += lwp->lwp_ru.minflt;
3084 pup->pr_majf += lwp->lwp_ru.majflt;
3085 pup->pr_nswap += lwp->lwp_ru.nswap;
3086 pup->pr_inblk += lwp->lwp_ru.inblock;
3087 pup->pr_oublk += lwp->lwp_ru.oublock;
3088 pup->pr_msnd += lwp->lwp_ru.msgsnd;
3089 pup->pr_mrcv += lwp->lwp_ru.msgrcv;
3090 pup->pr_sigs += lwp->lwp_ru.nsignals;
3091 pup->pr_vctx += lwp->lwp_ru.nvcsw;
3092 pup->pr_ictx += lwp->lwp_ru.nivcsw;
3093 pup->pr_sysc += lwp->lwp_ru.sysc;
3094 pup->pr_ioch += lwp->lwp_ru.ioch;
3095 }
3096
3097 /*
3098 * Convert a prhusage_t to a prusage_t.
3099 * This means convert each hrtime_t to a timestruc_t
3100 * and copy the count fields uint64_t => ulong_t.
3101 */
3102 void
3103 prcvtusage(prhusage_t *pup, prusage_t *upup)
3104 {
3105 uint64_t *ullp;
3106 ulong_t *ulp;
3107 int i;
3108
3109 upup->pr_lwpid = pup->pr_lwpid;
3110 upup->pr_count = pup->pr_count;
3111
3112 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp);
3113 hrt2ts(pup->pr_create, &upup->pr_create);
3114 hrt2ts(pup->pr_term, &upup->pr_term);
3115 hrt2ts(pup->pr_rtime, &upup->pr_rtime);
3116 hrt2ts(pup->pr_utime, &upup->pr_utime);
3117 hrt2ts(pup->pr_stime, &upup->pr_stime);
3118 hrt2ts(pup->pr_ttime, &upup->pr_ttime);
3119 hrt2ts(pup->pr_tftime, &upup->pr_tftime);
3120 hrt2ts(pup->pr_dftime, &upup->pr_dftime);
3121 hrt2ts(pup->pr_kftime, &upup->pr_kftime);
3122 hrt2ts(pup->pr_ltime, &upup->pr_ltime);
3123 hrt2ts(pup->pr_slptime, &upup->pr_slptime);
3124 hrt2ts(pup->pr_wtime, &upup->pr_wtime);
3125 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3126 bzero(upup->filltime, sizeof (upup->filltime));
3127
3128 ullp = &pup->pr_minf;
3129 ulp = &upup->pr_minf;
3130 for (i = 0; i < 22; i++)
3131 *ulp++ = (ulong_t)*ullp++;
3132 }
3133
3134 #ifdef _SYSCALL32_IMPL
3135 void
3136 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3137 {
3138 uint64_t *ullp;
3139 uint32_t *ulp;
3140 int i;
3141
3142 upup->pr_lwpid = pup->pr_lwpid;
3143 upup->pr_count = pup->pr_count;
3144
3145 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp);
3146 hrt2ts32(pup->pr_create, &upup->pr_create);
3147 hrt2ts32(pup->pr_term, &upup->pr_term);
3148 hrt2ts32(pup->pr_rtime, &upup->pr_rtime);
3149 hrt2ts32(pup->pr_utime, &upup->pr_utime);
3150 hrt2ts32(pup->pr_stime, &upup->pr_stime);
3151 hrt2ts32(pup->pr_ttime, &upup->pr_ttime);
3152 hrt2ts32(pup->pr_tftime, &upup->pr_tftime);
3153 hrt2ts32(pup->pr_dftime, &upup->pr_dftime);
3154 hrt2ts32(pup->pr_kftime, &upup->pr_kftime);
3155 hrt2ts32(pup->pr_ltime, &upup->pr_ltime);
3156 hrt2ts32(pup->pr_slptime, &upup->pr_slptime);
3157 hrt2ts32(pup->pr_wtime, &upup->pr_wtime);
3158 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime);
3159 bzero(upup->filltime, sizeof (upup->filltime));
3160
3161 ullp = &pup->pr_minf;
3162 ulp = &upup->pr_minf;
3163 for (i = 0; i < 22; i++)
3164 *ulp++ = (uint32_t)*ullp++;
3165 }
3166 #endif /* _SYSCALL32_IMPL */
3167
3168 /*
3169 * Determine whether a set is empty.
3170 */
3171 int
3172 setisempty(uint32_t *sp, uint_t n)
3173 {
3174 while (n--)
3175 if (*sp++)
3176 return (0);
3177 return (1);
3178 }
3179
3180 /*
3181 * Utility routine for establishing a watched area in the process.
3182 * Keep the list of watched areas sorted by virtual address.
3183 */
3184 int
3185 set_watched_area(proc_t *p, struct watched_area *pwa)
3186 {
3187 caddr_t vaddr = pwa->wa_vaddr;
3188 caddr_t eaddr = pwa->wa_eaddr;
3189 ulong_t flags = pwa->wa_flags;
3190 struct watched_area *target;
3191 avl_index_t where;
3192 int error = 0;
3193
3194 /* we must not be holding p->p_lock, but the process must be locked */
3195 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3196 ASSERT(p->p_proc_flag & P_PR_LOCK);
3197
3198 /*
3199 * If this is our first watchpoint, enable watchpoints for the process.
3200 */
3201 if (!pr_watch_active(p)) {
3202 kthread_t *t;
3203
3204 mutex_enter(&p->p_lock);
3205 if ((t = p->p_tlist) != NULL) {
3206 do {
3207 watch_enable(t);
3208 } while ((t = t->t_forw) != p->p_tlist);
3209 }
3210 mutex_exit(&p->p_lock);
3211 }
3212
3213 target = pr_find_watched_area(p, pwa, &where);
3214 if (target != NULL) {
3215 /*
3216 * We discovered an existing, overlapping watched area.
3217 * Allow it only if it is an exact match.
3218 */
3219 if (target->wa_vaddr != vaddr ||
3220 target->wa_eaddr != eaddr)
3221 error = EINVAL;
3222 else if (target->wa_flags != flags) {
3223 error = set_watched_page(p, vaddr, eaddr,
3224 flags, target->wa_flags);
3225 target->wa_flags = flags;
3226 }
3227 kmem_free(pwa, sizeof (struct watched_area));
3228 } else {
3229 avl_insert(&p->p_warea, pwa, where);
3230 error = set_watched_page(p, vaddr, eaddr, flags, 0);
3231 }
3232
3233 return (error);
3234 }
3235
3236 /*
3237 * Utility routine for clearing a watched area in the process.
3238 * Must be an exact match of the virtual address.
3239 * size and flags don't matter.
3240 */
3241 int
3242 clear_watched_area(proc_t *p, struct watched_area *pwa)
3243 {
3244 struct watched_area *found;
3245
3246 /* we must not be holding p->p_lock, but the process must be locked */
3247 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3248 ASSERT(p->p_proc_flag & P_PR_LOCK);
3249
3250
3251 if (!pr_watch_active(p)) {
3252 kmem_free(pwa, sizeof (struct watched_area));
3253 return (0);
3254 }
3255
3256 /*
3257 * Look for a matching address in the watched areas. If a match is
3258 * found, clear the old watched area and adjust the watched page(s). It
3259 * is not an error if there is no match.
3260 */
3261 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3262 found->wa_vaddr == pwa->wa_vaddr) {
3263 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3264 found->wa_flags);
3265 avl_remove(&p->p_warea, found);
3266 kmem_free(found, sizeof (struct watched_area));
3267 }
3268
3269 kmem_free(pwa, sizeof (struct watched_area));
3270
3271 /*
3272 * If we removed the last watched area from the process, disable
3273 * watchpoints.
3274 */
3275 if (!pr_watch_active(p)) {
3276 kthread_t *t;
3277
3278 mutex_enter(&p->p_lock);
3279 if ((t = p->p_tlist) != NULL) {
3280 do {
3281 watch_disable(t);
3282 } while ((t = t->t_forw) != p->p_tlist);
3283 }
3284 mutex_exit(&p->p_lock);
3285 }
3286
3287 return (0);
3288 }
3289
3290 /*
3291 * Frees all the watched_area structures
3292 */
3293 void
3294 pr_free_watchpoints(proc_t *p)
3295 {
3296 struct watched_area *delp;
3297 void *cookie;
3298
3299 cookie = NULL;
3300 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3301 kmem_free(delp, sizeof (struct watched_area));
3302
3303 avl_destroy(&p->p_warea);
3304 }
3305
3306 /*
3307 * This one is called by the traced process to unwatch all the
3308 * pages while deallocating the list of watched_page structs.
3309 */
3310 void
3311 pr_free_watched_pages(proc_t *p)
3312 {
3313 struct as *as = p->p_as;
3314 struct watched_page *pwp;
3315 uint_t prot;
3316 int retrycnt, err;
3317 void *cookie;
3318
3319 if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3320 return;
3321
3322 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3323 AS_LOCK_ENTER(as, RW_WRITER);
3324
3325 pwp = avl_first(&as->a_wpage);
3326
3327 cookie = NULL;
3328 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3329 retrycnt = 0;
3330 if ((prot = pwp->wp_oprot) != 0) {
3331 caddr_t addr = pwp->wp_vaddr;
3332 struct seg *seg;
3333 retry:
3334
3335 if ((pwp->wp_prot != prot ||
3336 (pwp->wp_flags & WP_NOWATCH)) &&
3337 (seg = as_segat(as, addr)) != NULL) {
3338 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3339 if (err == IE_RETRY) {
3340 ASSERT(retrycnt == 0);
3341 retrycnt++;
3342 goto retry;
3343 }
3344 }
3345 }
3346 kmem_free(pwp, sizeof (struct watched_page));
3347 }
3348
3349 avl_destroy(&as->a_wpage);
3350 p->p_wprot = NULL;
3351
3352 AS_LOCK_EXIT(as);
3353 }
3354
3355 /*
3356 * Insert a watched area into the list of watched pages.
3357 * If oflags is zero then we are adding a new watched area.
3358 * Otherwise we are changing the flags of an existing watched area.
3359 */
3360 static int
3361 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3362 ulong_t flags, ulong_t oflags)
3363 {
3364 struct as *as = p->p_as;
3365 avl_tree_t *pwp_tree;
3366 struct watched_page *pwp, *newpwp;
3367 struct watched_page tpw;
3368 avl_index_t where;
3369 struct seg *seg;
3370 uint_t prot;
3371 caddr_t addr;
3372
3373 /*
3374 * We need to pre-allocate a list of structures before we grab the
3375 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3376 * held.
3377 */
3378 newpwp = NULL;
3379 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3380 addr < eaddr; addr += PAGESIZE) {
3381 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3382 pwp->wp_list = newpwp;
3383 newpwp = pwp;
3384 }
3385
3386 AS_LOCK_ENTER(as, RW_WRITER);
3387
3388 /*
3389 * Search for an existing watched page to contain the watched area.
3390 * If none is found, grab a new one from the available list
3391 * and insert it in the active list, keeping the list sorted
3392 * by user-level virtual address.
3393 */
3394 if (p->p_flag & SVFWAIT)
3395 pwp_tree = &p->p_wpage;
3396 else
3397 pwp_tree = &as->a_wpage;
3398
3399 again:
3400 if (avl_numnodes(pwp_tree) > prnwatch) {
3401 AS_LOCK_EXIT(as);
3402 while (newpwp != NULL) {
3403 pwp = newpwp->wp_list;
3404 kmem_free(newpwp, sizeof (struct watched_page));
3405 newpwp = pwp;
3406 }
3407 return (E2BIG);
3408 }
3409
3410 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3411 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3412 pwp = newpwp;
3413 newpwp = newpwp->wp_list;
3414 pwp->wp_list = NULL;
3415 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3416 (uintptr_t)PAGEMASK);
3417 avl_insert(pwp_tree, pwp, where);
3418 }
3419
3420 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3421
3422 if (oflags & WA_READ)
3423 pwp->wp_read--;
3424 if (oflags & WA_WRITE)
3425 pwp->wp_write--;
3426 if (oflags & WA_EXEC)
3427 pwp->wp_exec--;
3428
3429 ASSERT(pwp->wp_read >= 0);
3430 ASSERT(pwp->wp_write >= 0);
3431 ASSERT(pwp->wp_exec >= 0);
3432
3433 if (flags & WA_READ)
3434 pwp->wp_read++;
3435 if (flags & WA_WRITE)
3436 pwp->wp_write++;
3437 if (flags & WA_EXEC)
3438 pwp->wp_exec++;
3439
3440 if (!(p->p_flag & SVFWAIT)) {
3441 vaddr = pwp->wp_vaddr;
3442 if (pwp->wp_oprot == 0 &&
3443 (seg = as_segat(as, vaddr)) != NULL) {
3444 SEGOP_GETPROT(seg, vaddr, 0, &prot);
3445 pwp->wp_oprot = (uchar_t)prot;
3446 pwp->wp_prot = (uchar_t)prot;
3447 }
3448 if (pwp->wp_oprot != 0) {
3449 prot = pwp->wp_oprot;
3450 if (pwp->wp_read)
3451 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3452 if (pwp->wp_write)
3453 prot &= ~PROT_WRITE;
3454 if (pwp->wp_exec)
3455 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3456 if (!(pwp->wp_flags & WP_NOWATCH) &&
3457 pwp->wp_prot != prot &&
3458 (pwp->wp_flags & WP_SETPROT) == 0) {
3459 pwp->wp_flags |= WP_SETPROT;
3460 pwp->wp_list = p->p_wprot;
3461 p->p_wprot = pwp;
3462 }
3463 pwp->wp_prot = (uchar_t)prot;
3464 }
3465 }
3466
3467 /*
3468 * If the watched area extends into the next page then do
3469 * it over again with the virtual address of the next page.
3470 */
3471 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3472 goto again;
3473
3474 AS_LOCK_EXIT(as);
3475
3476 /*
3477 * Free any pages we may have over-allocated
3478 */
3479 while (newpwp != NULL) {
3480 pwp = newpwp->wp_list;
3481 kmem_free(newpwp, sizeof (struct watched_page));
3482 newpwp = pwp;
3483 }
3484
3485 return (0);
3486 }
3487
3488 /*
3489 * Remove a watched area from the list of watched pages.
3490 * A watched area may extend over more than one page.
3491 */
3492 static void
3493 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3494 {
3495 struct as *as = p->p_as;
3496 struct watched_page *pwp;
3497 struct watched_page tpw;
3498 avl_tree_t *tree;
3499 avl_index_t where;
3500
3501 AS_LOCK_ENTER(as, RW_WRITER);
3502
3503 if (p->p_flag & SVFWAIT)
3504 tree = &p->p_wpage;
3505 else
3506 tree = &as->a_wpage;
3507
3508 tpw.wp_vaddr = vaddr =
3509 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3510 pwp = avl_find(tree, &tpw, &where);
3511 if (pwp == NULL)
3512 pwp = avl_nearest(tree, where, AVL_AFTER);
3513
3514 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3515 ASSERT(vaddr <= pwp->wp_vaddr);
3516
3517 if (flags & WA_READ)
3518 pwp->wp_read--;
3519 if (flags & WA_WRITE)
3520 pwp->wp_write--;
3521 if (flags & WA_EXEC)
3522 pwp->wp_exec--;
3523
3524 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3525 /*
3526 * Reset the hat layer's protections on this page.
3527 */
3528 if (pwp->wp_oprot != 0) {
3529 uint_t prot = pwp->wp_oprot;
3530
3531 if (pwp->wp_read)
3532 prot &=
3533 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3534 if (pwp->wp_write)
3535 prot &= ~PROT_WRITE;
3536 if (pwp->wp_exec)
3537 prot &=
3538 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3539 if (!(pwp->wp_flags & WP_NOWATCH) &&
3540 pwp->wp_prot != prot &&
3541 (pwp->wp_flags & WP_SETPROT) == 0) {
3542 pwp->wp_flags |= WP_SETPROT;
3543 pwp->wp_list = p->p_wprot;
3544 p->p_wprot = pwp;
3545 }
3546 pwp->wp_prot = (uchar_t)prot;
3547 }
3548 } else {
3549 /*
3550 * No watched areas remain in this page.
3551 * Reset everything to normal.
3552 */
3553 if (pwp->wp_oprot != 0) {
3554 pwp->wp_prot = pwp->wp_oprot;
3555 if ((pwp->wp_flags & WP_SETPROT) == 0) {
3556 pwp->wp_flags |= WP_SETPROT;
3557 pwp->wp_list = p->p_wprot;
3558 p->p_wprot = pwp;
3559 }
3560 }
3561 }
3562
3563 pwp = AVL_NEXT(tree, pwp);
3564 }
3565
3566 AS_LOCK_EXIT(as);
3567 }
3568
3569 /*
3570 * Return the original protections for the specified page.
3571 */
3572 static void
3573 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3574 {
3575 struct watched_page *pwp;
3576 struct watched_page tpw;
3577
3578 ASSERT(AS_LOCK_HELD(as));
3579
3580 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3581 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3582 *prot = pwp->wp_oprot;
3583 }
3584
3585 static prpagev_t *
3586 pr_pagev_create(struct seg *seg, int check_noreserve)
3587 {
3588 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3589 size_t total_pages = seg_pages(seg);
3590
3591 /*
3592 * Limit the size of our vectors to pagev_lim pages at a time. We need
3593 * 4 or 5 bytes of storage per page, so this means we limit ourself
3594 * to about a megabyte of kernel heap by default.
3595 */
3596 pagev->pg_npages = MIN(total_pages, pagev_lim);
3597 pagev->pg_pnbase = 0;
3598
3599 pagev->pg_protv =
3600 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3601
3602 if (check_noreserve)
3603 pagev->pg_incore =
3604 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3605 else
3606 pagev->pg_incore = NULL;
3607
3608 return (pagev);
3609 }
3610
3611 static void
3612 pr_pagev_destroy(prpagev_t *pagev)
3613 {
3614 if (pagev->pg_incore != NULL)
3615 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3616
3617 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3618 kmem_free(pagev, sizeof (prpagev_t));
3619 }
3620
3621 static caddr_t
3622 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3623 {
3624 ulong_t lastpg = seg_page(seg, eaddr - 1);
3625 ulong_t pn, pnlim;
3626 caddr_t saddr;
3627 size_t len;
3628
3629 ASSERT(addr >= seg->s_base && addr <= eaddr);
3630
3631 if (addr == eaddr)
3632 return (eaddr);
3633
3634 refill:
3635 ASSERT(addr < eaddr);
3636 pagev->pg_pnbase = seg_page(seg, addr);
3637 pnlim = pagev->pg_pnbase + pagev->pg_npages;
3638 saddr = addr;
3639
3640 if (lastpg < pnlim)
3641 len = (size_t)(eaddr - addr);
3642 else
3643 len = pagev->pg_npages * PAGESIZE;
3644
3645 if (pagev->pg_incore != NULL) {
3646 /*
3647 * INCORE cleverly has different semantics than GETPROT:
3648 * it returns info on pages up to but NOT including addr + len.
3649 */
3650 SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3651 pn = pagev->pg_pnbase;
3652
3653 do {
3654 /*
3655 * Guilty knowledge here: We know that segvn_incore
3656 * returns more than just the low-order bit that
3657 * indicates the page is actually in memory. If any
3658 * bits are set, then the page has backing store.
3659 */
3660 if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3661 goto out;
3662
3663 } while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3664
3665 /*
3666 * If we examined all the pages in the vector but we're not
3667 * at the end of the segment, take another lap.
3668 */
3669 if (addr < eaddr)
3670 goto refill;
3671 }
3672
3673 /*
3674 * Need to take len - 1 because addr + len is the address of the
3675 * first byte of the page just past the end of what we want.
3676 */
3677 out:
3678 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3679 return (addr);
3680 }
3681
3682 static caddr_t
3683 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3684 caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3685 {
3686 /*
3687 * Our starting address is either the specified address, or the base
3688 * address from the start of the pagev. If the latter is greater,
3689 * this means a previous call to pr_pagev_fill has already scanned
3690 * further than the end of the previous mapping.
3691 */
3692 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3693 caddr_t addr = MAX(*saddrp, base);
3694 ulong_t pn = seg_page(seg, addr);
3695 uint_t prot, nprot;
3696
3697 /*
3698 * If we're dealing with noreserve pages, then advance addr to
3699 * the address of the next page which has backing store.
3700 */
3701 if (pagev->pg_incore != NULL) {
3702 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3703 if ((addr += PAGESIZE) == eaddr) {
3704 *saddrp = addr;
3705 prot = 0;
3706 goto out;
3707 }
3708 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3709 addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3710 if (addr == eaddr) {
3711 *saddrp = addr;
3712 prot = 0;
3713 goto out;
3714 }
3715 pn = seg_page(seg, addr);
3716 }
3717 }
3718 }
3719
3720 /*
3721 * Get the protections on the page corresponding to addr.
3722 */
3723 pn = seg_page(seg, addr);
3724 ASSERT(pn >= pagev->pg_pnbase);
3725 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3726
3727 prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3728 getwatchprot(seg->s_as, addr, &prot);
3729 *saddrp = addr;
3730
3731 /*
3732 * Now loop until we find a backed page with different protections
3733 * or we reach the end of this segment.
3734 */
3735 while ((addr += PAGESIZE) < eaddr) {
3736 /*
3737 * If pn has advanced to the page number following what we
3738 * have information on, refill the page vector and reset
3739 * addr and pn. If pr_pagev_fill does not return the
3740 * address of the next page, we have a discontiguity and
3741 * thus have reached the end of the current mapping.
3742 */
3743 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3744 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3745 if (naddr != addr)
3746 goto out;
3747 pn = seg_page(seg, addr);
3748 }
3749
3750 /*
3751 * The previous page's protections are in prot, and it has
3752 * backing. If this page is MAP_NORESERVE and has no backing,
3753 * then end this mapping and return the previous protections.
3754 */
3755 if (pagev->pg_incore != NULL &&
3756 pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3757 break;
3758
3759 /*
3760 * Otherwise end the mapping if this page's protections (nprot)
3761 * are different than those in the previous page (prot).
3762 */
3763 nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3764 getwatchprot(seg->s_as, addr, &nprot);
3765
3766 if (nprot != prot)
3767 break;
3768 }
3769
3770 out:
3771 *protp = prot;
3772 return (addr);
3773 }
3774
3775 size_t
3776 pr_getsegsize(struct seg *seg, int reserved)
3777 {
3778 size_t size = seg->s_size;
3779
3780 /*
3781 * If we're interested in the reserved space, return the size of the
3782 * segment itself. Everything else in this function is a special case
3783 * to determine the actual underlying size of various segment types.
3784 */
3785 if (reserved)
3786 return (size);
3787
3788 /*
3789 * If this is a segvn mapping of a regular file, return the smaller
3790 * of the segment size and the remaining size of the file beyond
3791 * the file offset corresponding to seg->s_base.
3792 */
3793 if (seg->s_ops == &segvn_ops) {
3794 vattr_t vattr;
3795 vnode_t *vp;
3796
3797 vattr.va_mask = AT_SIZE;
3798
3799 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3800 vp != NULL && vp->v_type == VREG &&
3801 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3802
3803 u_offset_t fsize = vattr.va_size;
3804 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3805
3806 if (fsize < offset)
3807 fsize = 0;
3808 else
3809 fsize -= offset;
3810
3811 fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3812
3813 if (fsize < (u_offset_t)size)
3814 size = (size_t)fsize;
3815 }
3816
3817 return (size);
3818 }
3819
3820 /*
3821 * If this is an ISM shared segment, don't include pages that are
3822 * beyond the real size of the spt segment that backs it.
3823 */
3824 if (seg->s_ops == &segspt_shmops)
3825 return (MIN(spt_realsize(seg), size));
3826
3827 /*
3828 * If this is segment is a mapping from /dev/null, then this is a
3829 * reservation of virtual address space and has no actual size.
3830 * Such segments are backed by segdev and have type set to neither
3831 * MAP_SHARED nor MAP_PRIVATE.
3832 */
3833 if (seg->s_ops == &segdev_ops &&
3834 ((SEGOP_GETTYPE(seg, seg->s_base) &
3835 (MAP_SHARED | MAP_PRIVATE)) == 0))
3836 return (0);
3837
3838 /*
3839 * If this segment doesn't match one of the special types we handle,
3840 * just return the size of the segment itself.
3841 */
3842 return (size);
3843 }
3844
3845 uint_t
3846 pr_getprot(struct seg *seg, int reserved, void **tmp,
3847 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3848 {
3849 struct as *as = seg->s_as;
3850
3851 caddr_t saddr = *saddrp;
3852 caddr_t naddr;
3853
3854 int check_noreserve;
3855 uint_t prot;
3856
3857 union {
3858 struct segvn_data *svd;
3859 struct segdev_data *sdp;
3860 void *data;
3861 } s;
3862
3863 s.data = seg->s_data;
3864
3865 ASSERT(AS_WRITE_HELD(as));
3866 ASSERT(saddr >= seg->s_base && saddr < eaddr);
3867 ASSERT(eaddr <= seg->s_base + seg->s_size);
3868
3869 /*
3870 * Don't include MAP_NORESERVE pages in the address range
3871 * unless their mappings have actually materialized.
3872 * We cheat by knowing that segvn is the only segment
3873 * driver that supports MAP_NORESERVE.
3874 */
3875 check_noreserve =
3876 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3877 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3878 (s.svd->flags & MAP_NORESERVE));
3879
3880 /*
3881 * Examine every page only as a last resort. We use guilty knowledge
3882 * of segvn and segdev to avoid this: if there are no per-page
3883 * protections present in the segment and we don't care about
3884 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3885 */
3886 if (!check_noreserve && saddr == seg->s_base &&
3887 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3888 prot = s.svd->prot;
3889 getwatchprot(as, saddr, &prot);
3890 naddr = eaddr;
3891
3892 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3893 s.sdp != NULL && s.sdp->pageprot == 0) {
3894 prot = s.sdp->prot;
3895 getwatchprot(as, saddr, &prot);
3896 naddr = eaddr;
3897
3898 } else {
3899 prpagev_t *pagev;
3900
3901 /*
3902 * If addr is sitting at the start of the segment, then
3903 * create a page vector to store protection and incore
3904 * information for pages in the segment, and fill it.
3905 * Otherwise, we expect *tmp to address the prpagev_t
3906 * allocated by a previous call to this function.
3907 */
3908 if (saddr == seg->s_base) {
3909 pagev = pr_pagev_create(seg, check_noreserve);
3910 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3911
3912 ASSERT(*tmp == NULL);
3913 *tmp = pagev;
3914
3915 ASSERT(saddr <= eaddr);
3916 *saddrp = saddr;
3917
3918 if (saddr == eaddr) {
3919 naddr = saddr;
3920 prot = 0;
3921 goto out;
3922 }
3923
3924 } else {
3925 ASSERT(*tmp != NULL);
3926 pagev = (prpagev_t *)*tmp;
3927 }
3928
3929 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3930 ASSERT(naddr <= eaddr);
3931 }
3932
3933 out:
3934 if (naddr == eaddr)
3935 pr_getprot_done(tmp);
3936 *naddrp = naddr;
3937 return (prot);
3938 }
3939
3940 void
3941 pr_getprot_done(void **tmp)
3942 {
3943 if (*tmp != NULL) {
3944 pr_pagev_destroy((prpagev_t *)*tmp);
3945 *tmp = NULL;
3946 }
3947 }
3948
3949 /*
3950 * Return true iff the vnode is a /proc file from the object directory.
3951 */
3952 int
3953 pr_isobject(vnode_t *vp)
3954 {
3955 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3956 }
3957
3958 /*
3959 * Return true iff the vnode is a /proc file opened by the process itself.
3960 */
3961 int
3962 pr_isself(vnode_t *vp)
3963 {
3964 /*
3965 * XXX: To retain binary compatibility with the old
3966 * ioctl()-based version of /proc, we exempt self-opens
3967 * of /proc/<pid> from being marked close-on-exec.
3968 */
3969 return (vn_matchops(vp, prvnodeops) &&
3970 (VTOP(vp)->pr_flags & PR_ISSELF) &&
3971 VTOP(vp)->pr_type != PR_PIDDIR);
3972 }
3973
3974 static ssize_t
3975 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3976 {
3977 ssize_t pagesize, hatsize;
3978
3979 ASSERT(AS_WRITE_HELD(seg->s_as));
3980 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3981 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3982 ASSERT(saddr < eaddr);
3983
3984 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3985 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3986 ASSERT(pagesize != 0);
3987
3988 if (pagesize == -1)
3989 pagesize = PAGESIZE;
3990
3991 saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3992
3993 while (saddr < eaddr) {
3994 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3995 break;
3996 ASSERT(IS_P2ALIGNED(saddr, pagesize));
3997 saddr += pagesize;
3998 }
3999
4000 *naddrp = ((saddr < eaddr) ? saddr : eaddr);
4001 return (hatsize);
4002 }
4003
4004 /*
4005 * Return an array of structures with extended memory map information.
4006 * We allocate here; the caller must deallocate.
4007 */
4008 int
4009 prgetxmap(proc_t *p, list_t *iolhead)
4010 {
4011 struct as *as = p->p_as;
4012 prxmap_t *mp;
4013 struct seg *seg;
4014 struct seg *brkseg, *stkseg;
4015 struct vnode *vp;
4016 struct vattr vattr;
4017 uint_t prot;
4018
4019 ASSERT(as != &kas && AS_WRITE_HELD(as));
4020
4021 /*
4022 * Request an initial buffer size that doesn't waste memory
4023 * if the address space has only a small number of segments.
4024 */
4025 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4026
4027 if ((seg = AS_SEGFIRST(as)) == NULL)
4028 return (0);
4029
4030 brkseg = break_seg(p);
4031 stkseg = as_segat(as, prgetstackbase(p));
4032
4033 do {
4034 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4035 caddr_t saddr, naddr, baddr;
4036 void *tmp = NULL;
4037 ssize_t psz;
4038 char *parr;
4039 uint64_t npages;
4040 uint64_t pagenum;
4041
4042 /*
4043 * Segment loop part one: iterate from the base of the segment
4044 * to its end, pausing at each address boundary (baddr) between
4045 * ranges that have different virtual memory protections.
4046 */
4047 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4048 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4049 ASSERT(baddr >= saddr && baddr <= eaddr);
4050
4051 /*
4052 * Segment loop part two: iterate from the current
4053 * position to the end of the protection boundary,
4054 * pausing at each address boundary (naddr) between
4055 * ranges that have different underlying page sizes.
4056 */
4057 for (; saddr < baddr; saddr = naddr) {
4058 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4059 ASSERT(naddr >= saddr && naddr <= baddr);
4060
4061 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4062
4063 mp->pr_vaddr = (uintptr_t)saddr;
4064 mp->pr_size = naddr - saddr;
4065 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4066 mp->pr_mflags = 0;
4067 if (prot & PROT_READ)
4068 mp->pr_mflags |= MA_READ;
4069 if (prot & PROT_WRITE)
4070 mp->pr_mflags |= MA_WRITE;
4071 if (prot & PROT_EXEC)
4072 mp->pr_mflags |= MA_EXEC;
4073 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4074 mp->pr_mflags |= MA_SHARED;
4075 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4076 mp->pr_mflags |= MA_NORESERVE;
4077 if (seg->s_ops == &segspt_shmops ||
4078 (seg->s_ops == &segvn_ops &&
4079 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4080 vp == NULL)))
4081 mp->pr_mflags |= MA_ANON;
4082 if (seg == brkseg)
4083 mp->pr_mflags |= MA_BREAK;
4084 else if (seg == stkseg)
4085 mp->pr_mflags |= MA_STACK;
4086 if (seg->s_ops == &segspt_shmops)
4087 mp->pr_mflags |= MA_ISM | MA_SHM;
4088
4089 mp->pr_pagesize = PAGESIZE;
4090 if (psz == -1) {
4091 mp->pr_hatpagesize = 0;
4092 } else {
4093 mp->pr_hatpagesize = psz;
4094 }
4095
4096 /*
4097 * Manufacture a filename for the "object" dir.
4098 */
4099 mp->pr_dev = PRNODEV;
4100 vattr.va_mask = AT_FSID|AT_NODEID;
4101 if (seg->s_ops == &segvn_ops &&
4102 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4103 vp != NULL && vp->v_type == VREG &&
4104 VOP_GETATTR(vp, &vattr, 0, CRED(),
4105 NULL) == 0) {
4106 mp->pr_dev = vattr.va_fsid;
4107 mp->pr_ino = vattr.va_nodeid;
4108 if (vp == p->p_exec)
4109 (void) strcpy(mp->pr_mapname,
4110 "a.out");
4111 else
4112 pr_object_name(mp->pr_mapname,
4113 vp, &vattr);
4114 }
4115
4116 /*
4117 * Get the SysV shared memory id, if any.
4118 */
4119 if ((mp->pr_mflags & MA_SHARED) &&
4120 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4121 seg->s_base)) != SHMID_NONE) {
4122 if (mp->pr_shmid == SHMID_FREE)
4123 mp->pr_shmid = -1;
4124
4125 mp->pr_mflags |= MA_SHM;
4126 } else {
4127 mp->pr_shmid = -1;
4128 }
4129
4130 npages = ((uintptr_t)(naddr - saddr)) >>
4131 PAGESHIFT;
4132 parr = kmem_zalloc(npages, KM_SLEEP);
4133
4134 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4135
4136 for (pagenum = 0; pagenum < npages; pagenum++) {
4137 if (parr[pagenum] & SEG_PAGE_INCORE)
4138 mp->pr_rss++;
4139 if (parr[pagenum] & SEG_PAGE_ANON)
4140 mp->pr_anon++;
4141 if (parr[pagenum] & SEG_PAGE_LOCKED)
4142 mp->pr_locked++;
4143 }
4144 kmem_free(parr, npages);
4145 }
4146 }
4147 ASSERT(tmp == NULL);
4148 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4149
4150 return (0);
4151 }
4152
4153 /*
4154 * Return the process's credentials. We don't need a 32-bit equivalent of
4155 * this function because prcred_t and prcred32_t are actually the same.
4156 */
4157 void
4158 prgetcred(proc_t *p, prcred_t *pcrp)
4159 {
4160 mutex_enter(&p->p_crlock);
4161 cred2prcred(p->p_cred, pcrp);
4162 mutex_exit(&p->p_crlock);
4163 }
4164
4165 /*
4166 * Compute actual size of the prpriv_t structure.
4167 */
4168
4169 size_t
4170 prgetprivsize(void)
4171 {
4172 return (priv_prgetprivsize(NULL));
4173 }
4174
4175 /*
4176 * Return the process's privileges. We don't need a 32-bit equivalent of
4177 * this function because prpriv_t and prpriv32_t are actually the same.
4178 */
4179 void
4180 prgetpriv(proc_t *p, prpriv_t *pprp)
4181 {
4182 mutex_enter(&p->p_crlock);
4183 cred2prpriv(p->p_cred, pprp);
4184 mutex_exit(&p->p_crlock);
4185 }
4186
4187 #ifdef _SYSCALL32_IMPL
4188 /*
4189 * Return an array of structures with HAT memory map information.
4190 * We allocate here; the caller must deallocate.
4191 */
4192 int
4193 prgetxmap32(proc_t *p, list_t *iolhead)
4194 {
4195 struct as *as = p->p_as;
4196 prxmap32_t *mp;
4197 struct seg *seg;
4198 struct seg *brkseg, *stkseg;
4199 struct vnode *vp;
4200 struct vattr vattr;
4201 uint_t prot;
4202
4203 ASSERT(as != &kas && AS_WRITE_HELD(as));
4204
4205 /*
4206 * Request an initial buffer size that doesn't waste memory
4207 * if the address space has only a small number of segments.
4208 */
4209 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4210
4211 if ((seg = AS_SEGFIRST(as)) == NULL)
4212 return (0);
4213
4214 brkseg = break_seg(p);
4215 stkseg = as_segat(as, prgetstackbase(p));
4216
4217 do {
4218 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4219 caddr_t saddr, naddr, baddr;
4220 void *tmp = NULL;
4221 ssize_t psz;
4222 char *parr;
4223 uint64_t npages;
4224 uint64_t pagenum;
4225
4226 /*
4227 * Segment loop part one: iterate from the base of the segment
4228 * to its end, pausing at each address boundary (baddr) between
4229 * ranges that have different virtual memory protections.
4230 */
4231 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4232 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4233 ASSERT(baddr >= saddr && baddr <= eaddr);
4234
4235 /*
4236 * Segment loop part two: iterate from the current
4237 * position to the end of the protection boundary,
4238 * pausing at each address boundary (naddr) between
4239 * ranges that have different underlying page sizes.
4240 */
4241 for (; saddr < baddr; saddr = naddr) {
4242 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4243 ASSERT(naddr >= saddr && naddr <= baddr);
4244
4245 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4246
4247 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4248 mp->pr_size = (size32_t)(naddr - saddr);
4249 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4250 mp->pr_mflags = 0;
4251 if (prot & PROT_READ)
4252 mp->pr_mflags |= MA_READ;
4253 if (prot & PROT_WRITE)
4254 mp->pr_mflags |= MA_WRITE;
4255 if (prot & PROT_EXEC)
4256 mp->pr_mflags |= MA_EXEC;
4257 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4258 mp->pr_mflags |= MA_SHARED;
4259 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4260 mp->pr_mflags |= MA_NORESERVE;
4261 if (seg->s_ops == &segspt_shmops ||
4262 (seg->s_ops == &segvn_ops &&
4263 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4264 vp == NULL)))
4265 mp->pr_mflags |= MA_ANON;
4266 if (seg == brkseg)
4267 mp->pr_mflags |= MA_BREAK;
4268 else if (seg == stkseg)
4269 mp->pr_mflags |= MA_STACK;
4270 if (seg->s_ops == &segspt_shmops)
4271 mp->pr_mflags |= MA_ISM | MA_SHM;
4272
4273 mp->pr_pagesize = PAGESIZE;
4274 if (psz == -1) {
4275 mp->pr_hatpagesize = 0;
4276 } else {
4277 mp->pr_hatpagesize = psz;
4278 }
4279
4280 /*
4281 * Manufacture a filename for the "object" dir.
4282 */
4283 mp->pr_dev = PRNODEV32;
4284 vattr.va_mask = AT_FSID|AT_NODEID;
4285 if (seg->s_ops == &segvn_ops &&
4286 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4287 vp != NULL && vp->v_type == VREG &&
4288 VOP_GETATTR(vp, &vattr, 0, CRED(),
4289 NULL) == 0) {
4290 (void) cmpldev(&mp->pr_dev,
4291 vattr.va_fsid);
4292 mp->pr_ino = vattr.va_nodeid;
4293 if (vp == p->p_exec)
4294 (void) strcpy(mp->pr_mapname,
4295 "a.out");
4296 else
4297 pr_object_name(mp->pr_mapname,
4298 vp, &vattr);
4299 }
4300
4301 /*
4302 * Get the SysV shared memory id, if any.
4303 */
4304 if ((mp->pr_mflags & MA_SHARED) &&
4305 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4306 seg->s_base)) != SHMID_NONE) {
4307 if (mp->pr_shmid == SHMID_FREE)
4308 mp->pr_shmid = -1;
4309
4310 mp->pr_mflags |= MA_SHM;
4311 } else {
4312 mp->pr_shmid = -1;
4313 }
4314
4315 npages = ((uintptr_t)(naddr - saddr)) >>
4316 PAGESHIFT;
4317 parr = kmem_zalloc(npages, KM_SLEEP);
4318
4319 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4320
4321 for (pagenum = 0; pagenum < npages; pagenum++) {
4322 if (parr[pagenum] & SEG_PAGE_INCORE)
4323 mp->pr_rss++;
4324 if (parr[pagenum] & SEG_PAGE_ANON)
4325 mp->pr_anon++;
4326 if (parr[pagenum] & SEG_PAGE_LOCKED)
4327 mp->pr_locked++;
4328 }
4329 kmem_free(parr, npages);
4330 }
4331 }
4332 ASSERT(tmp == NULL);
4333 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4334
4335 return (0);
4336 }
4337 #endif /* _SYSCALL32_IMPL */