Print this page
OS-5373 lx_proc panic while reading /proc/<pid>/exe link
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3820 lxbrand ptrace(2): the next generation
OS-3685 lxbrand PTRACE_O_TRACEFORK race condition
OS-3834 lxbrand 64-bit strace(1) reports 64-bit process as using x32 ABI
OS-3794 lxbrand panic on init signal death
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/proc/prsubr.c
+++ new/usr/src/uts/common/fs/proc/prsubr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 - * Copyright (c) 2013, Joyent, Inc. All rights reserved.
24 + * Copyright 2016, Joyent, Inc.
25 25 */
26 26
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 28 /* All Rights Reserved */
29 29
30 30 #include <sys/types.h>
31 31 #include <sys/t_lock.h>
32 32 #include <sys/param.h>
33 33 #include <sys/cmn_err.h>
34 34 #include <sys/cred.h>
35 35 #include <sys/priv.h>
36 36 #include <sys/debug.h>
37 37 #include <sys/errno.h>
38 38 #include <sys/inline.h>
39 39 #include <sys/kmem.h>
40 40 #include <sys/mman.h>
41 41 #include <sys/proc.h>
42 42 #include <sys/brand.h>
43 43 #include <sys/sobject.h>
44 44 #include <sys/sysmacros.h>
45 45 #include <sys/systm.h>
46 46 #include <sys/uio.h>
47 47 #include <sys/var.h>
48 48 #include <sys/vfs.h>
49 49 #include <sys/vnode.h>
50 50 #include <sys/session.h>
51 51 #include <sys/pcb.h>
52 52 #include <sys/signal.h>
53 53 #include <sys/user.h>
54 54 #include <sys/disp.h>
55 55 #include <sys/class.h>
56 56 #include <sys/ts.h>
57 57 #include <sys/bitmap.h>
58 58 #include <sys/poll.h>
59 59 #include <sys/shm_impl.h>
60 60 #include <sys/fault.h>
61 61 #include <sys/syscall.h>
62 62 #include <sys/procfs.h>
63 63 #include <sys/processor.h>
64 64 #include <sys/cpuvar.h>
65 65 #include <sys/copyops.h>
66 66 #include <sys/time.h>
67 67 #include <sys/msacct.h>
68 68 #include <vm/as.h>
69 69 #include <vm/rm.h>
70 70 #include <vm/seg.h>
71 71 #include <vm/seg_vn.h>
72 72 #include <vm/seg_dev.h>
73 73 #include <vm/seg_spt.h>
74 74 #include <vm/page.h>
75 75 #include <sys/vmparam.h>
76 76 #include <sys/swap.h>
77 77 #include <fs/proc/prdata.h>
78 78 #include <sys/task.h>
79 79 #include <sys/project.h>
80 80 #include <sys/contract_impl.h>
81 81 #include <sys/contract/process.h>
82 82 #include <sys/contract/process_impl.h>
83 83 #include <sys/schedctl.h>
84 84 #include <sys/pool.h>
85 85 #include <sys/zone.h>
86 86 #include <sys/atomic.h>
87 87 #include <sys/sdt.h>
88 88
89 89 #define MAX_ITERS_SPIN 5
90 90
91 91 typedef struct prpagev {
92 92 uint_t *pg_protv; /* vector of page permissions */
93 93 char *pg_incore; /* vector of incore flags */
94 94 size_t pg_npages; /* number of pages in protv and incore */
95 95 ulong_t pg_pnbase; /* pn within segment of first protv element */
96 96 } prpagev_t;
97 97
98 98 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
99 99
100 100 extern struct seg_ops segdev_ops; /* needs a header file */
101 101 extern struct seg_ops segspt_shmops; /* needs a header file */
102 102
103 103 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
104 104 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
105 105
106 106 /*
107 107 * Choose an lwp from the complete set of lwps for the process.
108 108 * This is called for any operation applied to the process
109 109 * file descriptor that requires an lwp to operate upon.
110 110 *
111 111 * Returns a pointer to the thread for the selected LWP,
112 112 * and with the dispatcher lock held for the thread.
113 113 *
114 114 * The algorithm for choosing an lwp is critical for /proc semantics;
115 115 * don't touch this code unless you know all of the implications.
116 116 */
117 117 kthread_t *
118 118 prchoose(proc_t *p)
119 119 {
120 120 kthread_t *t;
121 121 kthread_t *t_onproc = NULL; /* running on processor */
122 122 kthread_t *t_run = NULL; /* runnable, on disp queue */
123 123 kthread_t *t_sleep = NULL; /* sleeping */
124 124 kthread_t *t_hold = NULL; /* sleeping, performing hold */
125 125 kthread_t *t_susp = NULL; /* suspended stop */
126 126 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */
127 127 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */
128 128 kthread_t *t_req = NULL; /* requested stop */
129 129 kthread_t *t_istop = NULL; /* event-of-interest stop */
130 130 kthread_t *t_dtrace = NULL; /* DTrace stop */
131 131
132 132 ASSERT(MUTEX_HELD(&p->p_lock));
133 133
134 134 /*
135 135 * If the agent lwp exists, it takes precedence over all others.
136 136 */
137 137 if ((t = p->p_agenttp) != NULL) {
138 138 thread_lock(t);
139 139 return (t);
140 140 }
141 141
142 142 if ((t = p->p_tlist) == NULL) /* start at the head of the list */
143 143 return (t);
144 144 do { /* for eacn lwp in the process */
145 145 if (VSTOPPED(t)) { /* virtually stopped */
146 146 if (t_req == NULL)
147 147 t_req = t;
148 148 continue;
149 149 }
150 150
151 151 thread_lock(t); /* make sure thread is in good state */
152 152 switch (t->t_state) {
153 153 default:
154 154 panic("prchoose: bad thread state %d, thread 0x%p",
155 155 t->t_state, (void *)t);
156 156 /*NOTREACHED*/
157 157 case TS_SLEEP:
158 158 /* this is filthy */
159 159 if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
160 160 t->t_wchan0 == NULL) {
161 161 if (t_hold == NULL)
162 162 t_hold = t;
163 163 } else {
164 164 if (t_sleep == NULL)
165 165 t_sleep = t;
166 166 }
167 167 break;
168 168 case TS_RUN:
169 169 case TS_WAIT:
170 170 if (t_run == NULL)
171 171 t_run = t;
172 172 break;
173 173 case TS_ONPROC:
174 174 if (t_onproc == NULL)
175 175 t_onproc = t;
176 176 break;
177 177 case TS_ZOMB: /* last possible choice */
178 178 break;
179 179 case TS_STOPPED:
180 180 switch (t->t_whystop) {
181 181 case PR_SUSPENDED:
182 182 if (t_susp == NULL)
183 183 t_susp = t;
184 184 break;
185 185 case PR_JOBCONTROL:
186 186 if (t->t_proc_flag & TP_PRSTOP) {
187 187 if (t_jdstop == NULL)
188 188 t_jdstop = t;
189 189 } else {
190 190 if (t_jstop == NULL)
191 191 t_jstop = t;
192 192 }
193 193 break;
|
↓ open down ↓ |
159 lines elided |
↑ open up ↑ |
194 194 case PR_REQUESTED:
195 195 if (t->t_dtrace_stop && t_dtrace == NULL)
196 196 t_dtrace = t;
197 197 else if (t_req == NULL)
198 198 t_req = t;
199 199 break;
200 200 case PR_SYSENTRY:
201 201 case PR_SYSEXIT:
202 202 case PR_SIGNALLED:
203 203 case PR_FAULTED:
204 + case PR_BRAND:
204 205 /*
205 206 * Make an lwp calling exit() be the
206 207 * last lwp seen in the process.
207 208 */
208 209 if (t_istop == NULL ||
209 210 (t_istop->t_whystop == PR_SYSENTRY &&
210 211 t_istop->t_whatstop == SYS_exit))
211 212 t_istop = t;
212 213 break;
213 214 case PR_CHECKPOINT: /* can't happen? */
214 215 break;
215 216 default:
216 217 panic("prchoose: bad t_whystop %d, thread 0x%p",
217 218 t->t_whystop, (void *)t);
218 219 /*NOTREACHED*/
219 220 }
220 221 break;
221 222 }
222 223 thread_unlock(t);
223 224 } while ((t = t->t_forw) != p->p_tlist);
224 225
225 226 if (t_onproc)
226 227 t = t_onproc;
227 228 else if (t_run)
228 229 t = t_run;
229 230 else if (t_sleep)
230 231 t = t_sleep;
231 232 else if (t_jstop)
232 233 t = t_jstop;
233 234 else if (t_jdstop)
234 235 t = t_jdstop;
235 236 else if (t_istop)
236 237 t = t_istop;
237 238 else if (t_dtrace)
238 239 t = t_dtrace;
239 240 else if (t_req)
240 241 t = t_req;
241 242 else if (t_hold)
242 243 t = t_hold;
243 244 else if (t_susp)
244 245 t = t_susp;
245 246 else /* TS_ZOMB */
246 247 t = p->p_tlist;
247 248
248 249 if (t != NULL)
249 250 thread_lock(t);
250 251 return (t);
251 252 }
252 253
253 254 /*
254 255 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
255 256 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
256 257 * on the /proc file descriptor. Called from stop() when a traced
257 258 * process stops on an event of interest. Also called from exit()
258 259 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
259 260 */
260 261 void
261 262 prnotify(struct vnode *vp)
262 263 {
263 264 prcommon_t *pcp = VTOP(vp)->pr_common;
264 265
265 266 mutex_enter(&pcp->prc_mutex);
266 267 cv_broadcast(&pcp->prc_wait);
267 268 mutex_exit(&pcp->prc_mutex);
268 269 if (pcp->prc_flags & PRC_POLL) {
269 270 /*
270 271 * We call pollwakeup() with POLLHUP to ensure that
271 272 * the pollers are awakened even if they are polling
272 273 * for nothing (i.e., waiting for the process to exit).
273 274 * This enables the use of the PRC_POLL flag for optimization
274 275 * (we can turn off PRC_POLL only if we know no pollers remain).
275 276 */
276 277 pcp->prc_flags &= ~PRC_POLL;
277 278 pollwakeup(&pcp->prc_pollhead, POLLHUP);
278 279 }
279 280 }
280 281
281 282 /* called immediately below, in prfree() */
282 283 static void
283 284 prfreenotify(vnode_t *vp)
284 285 {
285 286 prnode_t *pnp;
286 287 prcommon_t *pcp;
287 288
288 289 while (vp != NULL) {
289 290 pnp = VTOP(vp);
290 291 pcp = pnp->pr_common;
291 292 ASSERT(pcp->prc_thread == NULL);
292 293 pcp->prc_proc = NULL;
293 294 /*
294 295 * We can't call prnotify() here because we are holding
295 296 * pidlock. We assert that there is no need to.
296 297 */
297 298 mutex_enter(&pcp->prc_mutex);
298 299 cv_broadcast(&pcp->prc_wait);
299 300 mutex_exit(&pcp->prc_mutex);
300 301 ASSERT(!(pcp->prc_flags & PRC_POLL));
301 302
302 303 vp = pnp->pr_next;
303 304 pnp->pr_next = NULL;
304 305 }
305 306 }
306 307
307 308 /*
308 309 * Called from a hook in freeproc() when a traced process is removed
309 310 * from the process table. The proc-table pointers of all associated
310 311 * /proc vnodes are cleared to indicate that the process has gone away.
311 312 */
312 313 void
313 314 prfree(proc_t *p)
314 315 {
315 316 uint_t slot = p->p_slot;
316 317
317 318 ASSERT(MUTEX_HELD(&pidlock));
318 319
319 320 /*
320 321 * Block the process against /proc so it can be freed.
321 322 * It cannot be freed while locked by some controlling process.
322 323 * Lock ordering:
323 324 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
324 325 */
325 326 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */
326 327 mutex_enter(&p->p_lock);
327 328 while (p->p_proc_flag & P_PR_LOCK) {
328 329 mutex_exit(&pr_pidlock);
329 330 cv_wait(&pr_pid_cv[slot], &p->p_lock);
330 331 mutex_exit(&p->p_lock);
331 332 mutex_enter(&pr_pidlock);
332 333 mutex_enter(&p->p_lock);
333 334 }
334 335
335 336 ASSERT(p->p_tlist == NULL);
336 337
337 338 prfreenotify(p->p_plist);
338 339 p->p_plist = NULL;
339 340
340 341 prfreenotify(p->p_trace);
341 342 p->p_trace = NULL;
342 343
343 344 /*
344 345 * We broadcast to wake up everyone waiting for this process.
345 346 * No one can reach this process from this point on.
346 347 */
347 348 cv_broadcast(&pr_pid_cv[slot]);
348 349
349 350 mutex_exit(&p->p_lock);
350 351 mutex_exit(&pr_pidlock);
351 352 }
352 353
353 354 /*
354 355 * Called from a hook in exit() when a traced process is becoming a zombie.
355 356 */
356 357 void
357 358 prexit(proc_t *p)
358 359 {
359 360 ASSERT(MUTEX_HELD(&p->p_lock));
360 361
361 362 if (pr_watch_active(p)) {
362 363 pr_free_watchpoints(p);
363 364 watch_disable(curthread);
364 365 }
365 366 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */
366 367 if (p->p_trace) {
367 368 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
368 369 prnotify(p->p_trace);
369 370 }
370 371 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */
371 372 }
372 373
373 374 /*
374 375 * Called when a thread calls lwp_exit().
375 376 */
376 377 void
377 378 prlwpexit(kthread_t *t)
378 379 {
379 380 vnode_t *vp;
380 381 prnode_t *pnp;
381 382 prcommon_t *pcp;
382 383 proc_t *p = ttoproc(t);
383 384 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
384 385
385 386 ASSERT(t == curthread);
386 387 ASSERT(MUTEX_HELD(&p->p_lock));
387 388
388 389 /*
389 390 * The process must be blocked against /proc to do this safely.
390 391 * The lwp must not disappear while the process is marked P_PR_LOCK.
391 392 * It is the caller's responsibility to have called prbarrier(p).
392 393 */
393 394 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
394 395
395 396 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
396 397 pnp = VTOP(vp);
397 398 pcp = pnp->pr_common;
398 399 if (pcp->prc_thread == t) {
399 400 pcp->prc_thread = NULL;
400 401 pcp->prc_flags |= PRC_DESTROY;
401 402 }
402 403 }
403 404
404 405 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
405 406 pnp = VTOP(vp);
406 407 pcp = pnp->pr_common;
407 408 pcp->prc_thread = NULL;
408 409 pcp->prc_flags |= PRC_DESTROY;
409 410 prnotify(vp);
410 411 }
411 412
412 413 if (p->p_trace)
413 414 prnotify(p->p_trace);
414 415 }
415 416
416 417 /*
417 418 * Called when a zombie thread is joined or when a
418 419 * detached lwp exits. Called from lwp_hash_out().
419 420 */
420 421 void
421 422 prlwpfree(proc_t *p, lwpent_t *lep)
422 423 {
423 424 vnode_t *vp;
424 425 prnode_t *pnp;
425 426 prcommon_t *pcp;
426 427
427 428 ASSERT(MUTEX_HELD(&p->p_lock));
428 429
429 430 /*
430 431 * The process must be blocked against /proc to do this safely.
431 432 * The lwp must not disappear while the process is marked P_PR_LOCK.
432 433 * It is the caller's responsibility to have called prbarrier(p).
433 434 */
434 435 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
435 436
436 437 vp = lep->le_trace;
437 438 lep->le_trace = NULL;
438 439 while (vp) {
439 440 prnotify(vp);
440 441 pnp = VTOP(vp);
441 442 pcp = pnp->pr_common;
442 443 ASSERT(pcp->prc_thread == NULL &&
443 444 (pcp->prc_flags & PRC_DESTROY));
444 445 pcp->prc_tslot = -1;
445 446 vp = pnp->pr_next;
446 447 pnp->pr_next = NULL;
447 448 }
448 449
449 450 if (p->p_trace)
450 451 prnotify(p->p_trace);
451 452 }
452 453
453 454 /*
454 455 * Called from a hook in exec() when a thread starts exec().
455 456 */
456 457 void
457 458 prexecstart(void)
458 459 {
459 460 proc_t *p = ttoproc(curthread);
460 461 klwp_t *lwp = ttolwp(curthread);
461 462
462 463 /*
463 464 * The P_PR_EXEC flag blocks /proc operations for
464 465 * the duration of the exec().
465 466 * We can't start exec() while the process is
466 467 * locked by /proc, so we call prbarrier().
467 468 * lwp_nostop keeps the process from being stopped
468 469 * via job control for the duration of the exec().
469 470 */
470 471
471 472 ASSERT(MUTEX_HELD(&p->p_lock));
472 473 prbarrier(p);
473 474 lwp->lwp_nostop++;
474 475 p->p_proc_flag |= P_PR_EXEC;
475 476 }
476 477
477 478 /*
478 479 * Called from a hook in exec() when a thread finishes exec().
479 480 * The thread may or may not have succeeded. Some other thread
480 481 * may have beat it to the punch.
481 482 */
482 483 void
483 484 prexecend(void)
484 485 {
485 486 proc_t *p = ttoproc(curthread);
486 487 klwp_t *lwp = ttolwp(curthread);
487 488 vnode_t *vp;
488 489 prnode_t *pnp;
489 490 prcommon_t *pcp;
490 491 model_t model = p->p_model;
491 492 id_t tid = curthread->t_tid;
492 493 int tslot = curthread->t_dslot;
493 494
494 495 ASSERT(MUTEX_HELD(&p->p_lock));
495 496
496 497 lwp->lwp_nostop--;
497 498 if (p->p_flag & SEXITLWPS) {
498 499 /*
499 500 * We are on our way to exiting because some
500 501 * other thread beat us in the race to exec().
501 502 * Don't clear the P_PR_EXEC flag in this case.
502 503 */
503 504 return;
504 505 }
505 506
506 507 /*
507 508 * Wake up anyone waiting in /proc for the process to complete exec().
508 509 */
509 510 p->p_proc_flag &= ~P_PR_EXEC;
510 511 if ((vp = p->p_trace) != NULL) {
511 512 pcp = VTOP(vp)->pr_common;
512 513 mutex_enter(&pcp->prc_mutex);
513 514 cv_broadcast(&pcp->prc_wait);
514 515 mutex_exit(&pcp->prc_mutex);
515 516 for (; vp != NULL; vp = pnp->pr_next) {
516 517 pnp = VTOP(vp);
517 518 pnp->pr_common->prc_datamodel = model;
518 519 }
519 520 }
520 521 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
521 522 /*
522 523 * We dealt with the process common above.
523 524 */
524 525 ASSERT(p->p_trace != NULL);
525 526 pcp = VTOP(vp)->pr_common;
526 527 mutex_enter(&pcp->prc_mutex);
|
↓ open down ↓ |
313 lines elided |
↑ open up ↑ |
527 528 cv_broadcast(&pcp->prc_wait);
528 529 mutex_exit(&pcp->prc_mutex);
529 530 for (; vp != NULL; vp = pnp->pr_next) {
530 531 pnp = VTOP(vp);
531 532 pcp = pnp->pr_common;
532 533 pcp->prc_datamodel = model;
533 534 pcp->prc_tid = tid;
534 535 pcp->prc_tslot = tslot;
535 536 }
536 537 }
538 +
539 + /*
540 + * There may be threads waiting for the flag change blocked behind the
541 + * pr_pid_cv as well.
542 + */
543 + cv_signal(&pr_pid_cv[p->p_slot]);
537 544 }
538 545
539 546 /*
540 547 * Called from a hook in relvm() just before freeing the address space.
541 548 * We free all the watched areas now.
542 549 */
543 550 void
544 551 prrelvm(void)
545 552 {
546 553 proc_t *p = ttoproc(curthread);
547 554
548 555 mutex_enter(&p->p_lock);
549 556 prbarrier(p); /* block all other /proc operations */
550 557 if (pr_watch_active(p)) {
551 558 pr_free_watchpoints(p);
552 559 watch_disable(curthread);
553 560 }
554 561 mutex_exit(&p->p_lock);
555 562 pr_free_watched_pages(p);
556 563 }
557 564
558 565 /*
559 566 * Called from hooks in exec-related code when a traced process
560 567 * attempts to exec(2) a setuid/setgid program or an unreadable
561 568 * file. Rather than fail the exec we invalidate the associated
562 569 * /proc vnodes so that subsequent attempts to use them will fail.
563 570 *
564 571 * All /proc vnodes, except directory vnodes, are retained on a linked
565 572 * list (rooted at p_plist in the process structure) until last close.
566 573 *
567 574 * A controlling process must re-open the /proc files in order to
568 575 * regain control.
569 576 */
570 577 void
571 578 prinvalidate(struct user *up)
572 579 {
573 580 kthread_t *t = curthread;
574 581 proc_t *p = ttoproc(t);
575 582 vnode_t *vp;
576 583 prnode_t *pnp;
577 584 int writers = 0;
578 585
579 586 mutex_enter(&p->p_lock);
580 587 prbarrier(p); /* block all other /proc operations */
581 588
582 589 /*
583 590 * At this moment, there can be only one lwp in the process.
584 591 */
585 592 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
586 593
587 594 /*
588 595 * Invalidate any currently active /proc vnodes.
589 596 */
590 597 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
591 598 pnp = VTOP(vp);
592 599 switch (pnp->pr_type) {
593 600 case PR_PSINFO: /* these files can read by anyone */
594 601 case PR_LPSINFO:
595 602 case PR_LWPSINFO:
596 603 case PR_LWPDIR:
597 604 case PR_LWPIDDIR:
598 605 case PR_USAGE:
599 606 case PR_LUSAGE:
600 607 case PR_LWPUSAGE:
601 608 break;
602 609 default:
603 610 pnp->pr_flags |= PR_INVAL;
604 611 break;
605 612 }
606 613 }
607 614 /*
608 615 * Wake up anyone waiting for the process or lwp.
609 616 * p->p_trace is guaranteed to be non-NULL if there
610 617 * are any open /proc files for this process.
611 618 */
612 619 if ((vp = p->p_trace) != NULL) {
613 620 prcommon_t *pcp = VTOP(vp)->pr_pcommon;
614 621
615 622 prnotify(vp);
616 623 /*
617 624 * Are there any writers?
618 625 */
619 626 if ((writers = pcp->prc_writers) != 0) {
620 627 /*
621 628 * Clear the exclusive open flag (old /proc interface).
622 629 * Set prc_selfopens equal to prc_writers so that
623 630 * the next O_EXCL|O_WRITE open will succeed
624 631 * even with existing (though invalid) writers.
625 632 * prclose() must decrement prc_selfopens when
626 633 * the invalid files are closed.
627 634 */
628 635 pcp->prc_flags &= ~PRC_EXCL;
629 636 ASSERT(pcp->prc_selfopens <= writers);
630 637 pcp->prc_selfopens = writers;
631 638 }
632 639 }
633 640 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
634 641 while (vp != NULL) {
635 642 /*
636 643 * We should not invalidate the lwpiddir vnodes,
637 644 * but the necessities of maintaining the old
638 645 * ioctl()-based version of /proc require it.
639 646 */
640 647 pnp = VTOP(vp);
641 648 pnp->pr_flags |= PR_INVAL;
642 649 prnotify(vp);
643 650 vp = pnp->pr_next;
644 651 }
645 652
646 653 /*
647 654 * If any tracing flags are in effect and any vnodes are open for
648 655 * writing then set the requested-stop and run-on-last-close flags.
649 656 * Otherwise, clear all tracing flags.
650 657 */
651 658 t->t_proc_flag &= ~TP_PAUSE;
652 659 if ((p->p_proc_flag & P_PR_TRACE) && writers) {
653 660 t->t_proc_flag |= TP_PRSTOP;
654 661 aston(t); /* so ISSIG will see the flag */
655 662 p->p_proc_flag |= P_PR_RUNLCL;
656 663 } else {
657 664 premptyset(&up->u_entrymask); /* syscalls */
658 665 premptyset(&up->u_exitmask);
659 666 up->u_systrap = 0;
660 667 premptyset(&p->p_sigmask); /* signals */
661 668 premptyset(&p->p_fltmask); /* faults */
662 669 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
663 670 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
664 671 prnostep(ttolwp(t));
665 672 }
666 673
667 674 mutex_exit(&p->p_lock);
668 675 }
669 676
670 677 /*
671 678 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
672 679 * Return with pr_pidlock held in all cases.
673 680 * Return with p_lock held if the the process still exists.
674 681 * Return value is the process pointer if the process still exists, else NULL.
675 682 * If we lock the process, give ourself kernel priority to avoid deadlocks;
676 683 * this is undone in prunlock().
677 684 */
678 685 proc_t *
679 686 pr_p_lock(prnode_t *pnp)
680 687 {
681 688 proc_t *p;
682 689 prcommon_t *pcp;
683 690
684 691 mutex_enter(&pr_pidlock);
685 692 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
686 693 return (NULL);
687 694 mutex_enter(&p->p_lock);
688 695 while (p->p_proc_flag & P_PR_LOCK) {
689 696 /*
690 697 * This cv/mutex pair is persistent even if
691 698 * the process disappears while we sleep.
692 699 */
693 700 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
694 701 kmutex_t *mp = &p->p_lock;
695 702
696 703 mutex_exit(&pr_pidlock);
697 704 cv_wait(cv, mp);
698 705 mutex_exit(mp);
699 706 mutex_enter(&pr_pidlock);
700 707 if (pcp->prc_proc == NULL)
701 708 return (NULL);
702 709 ASSERT(p == pcp->prc_proc);
703 710 mutex_enter(&p->p_lock);
704 711 }
705 712 p->p_proc_flag |= P_PR_LOCK;
706 713 THREAD_KPRI_REQUEST();
707 714 return (p);
708 715 }
709 716
710 717 /*
711 718 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
712 719 * This prevents any lwp of the process from disappearing and
713 720 * blocks most operations that a process can perform on itself.
714 721 * Returns 0 on success, a non-zero error number on failure.
715 722 *
716 723 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
717 724 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
718 725 *
719 726 * error returns:
720 727 * ENOENT: process or lwp has disappeared or process is exiting
721 728 * (or has become a zombie and zdisp == ZNO).
722 729 * EAGAIN: procfs vnode has become invalid.
723 730 * EINTR: signal arrived while waiting for exec to complete.
724 731 */
725 732 int
726 733 prlock(prnode_t *pnp, int zdisp)
727 734 {
728 735 prcommon_t *pcp;
729 736 proc_t *p;
730 737
731 738 again:
732 739 pcp = pnp->pr_common;
733 740 p = pr_p_lock(pnp);
734 741 mutex_exit(&pr_pidlock);
735 742
736 743 /*
737 744 * Return ENOENT immediately if there is no process.
738 745 */
739 746 if (p == NULL)
740 747 return (ENOENT);
741 748
742 749 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
743 750
744 751 /*
745 752 * Return ENOENT if process entered zombie state or is exiting
746 753 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
747 754 */
748 755 if (zdisp == ZNO &&
749 756 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
750 757 prunlock(pnp);
751 758 return (ENOENT);
752 759 }
753 760
754 761 /*
755 762 * If lwp-specific, check to see if lwp has disappeared.
756 763 */
757 764 if (pcp->prc_flags & PRC_LWP) {
758 765 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
759 766 pcp->prc_tslot == -1) {
760 767 prunlock(pnp);
761 768 return (ENOENT);
762 769 }
763 770 }
764 771
765 772 /*
766 773 * Return EAGAIN if we have encountered a security violation.
767 774 * (The process exec'd a set-id or unreadable executable file.)
768 775 */
769 776 if (pnp->pr_flags & PR_INVAL) {
770 777 prunlock(pnp);
771 778 return (EAGAIN);
772 779 }
773 780
774 781 /*
775 782 * If process is undergoing an exec(), wait for
776 783 * completion and then start all over again.
777 784 */
778 785 if (p->p_proc_flag & P_PR_EXEC) {
779 786 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */
780 787 mutex_enter(&pcp->prc_mutex);
781 788 prunlock(pnp);
782 789 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
783 790 mutex_exit(&pcp->prc_mutex);
784 791 return (EINTR);
785 792 }
786 793 mutex_exit(&pcp->prc_mutex);
787 794 goto again;
788 795 }
789 796
790 797 /*
791 798 * We return holding p->p_lock.
792 799 */
793 800 return (0);
794 801 }
795 802
796 803 /*
797 804 * Undo prlock() and pr_p_lock().
798 805 * p->p_lock is still held; pr_pidlock is no longer held.
799 806 *
800 807 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
801 808 * if any, waiting for the flag to be dropped; it retains p->p_lock.
802 809 *
803 810 * prunlock() calls prunmark() and then drops p->p_lock.
804 811 */
805 812 void
806 813 prunmark(proc_t *p)
807 814 {
808 815 ASSERT(p->p_proc_flag & P_PR_LOCK);
809 816 ASSERT(MUTEX_HELD(&p->p_lock));
810 817
811 818 cv_signal(&pr_pid_cv[p->p_slot]);
812 819 p->p_proc_flag &= ~P_PR_LOCK;
813 820 THREAD_KPRI_RELEASE();
814 821 }
815 822
816 823 void
817 824 prunlock(prnode_t *pnp)
818 825 {
819 826 prcommon_t *pcp = pnp->pr_common;
820 827 proc_t *p = pcp->prc_proc;
821 828
822 829 /*
823 830 * If we (or someone) gave it a SIGKILL, and it is not
824 831 * already a zombie, set it running unconditionally.
825 832 */
826 833 if ((p->p_flag & SKILLED) &&
827 834 !(p->p_flag & SEXITING) &&
828 835 !(pcp->prc_flags & PRC_DESTROY) &&
829 836 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
830 837 (void) pr_setrun(pnp, 0);
831 838 prunmark(p);
832 839 mutex_exit(&p->p_lock);
833 840 }
834 841
835 842 /*
836 843 * Called while holding p->p_lock to delay until the process is unlocked.
837 844 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
838 845 * The process cannot become locked again until p->p_lock is dropped.
839 846 */
840 847 void
841 848 prbarrier(proc_t *p)
842 849 {
843 850 ASSERT(MUTEX_HELD(&p->p_lock));
844 851
845 852 if (p->p_proc_flag & P_PR_LOCK) {
846 853 /* The process is locked; delay until not locked */
847 854 uint_t slot = p->p_slot;
848 855
849 856 while (p->p_proc_flag & P_PR_LOCK)
850 857 cv_wait(&pr_pid_cv[slot], &p->p_lock);
851 858 cv_signal(&pr_pid_cv[slot]);
852 859 }
853 860 }
854 861
855 862 /*
856 863 * Return process/lwp status.
857 864 * The u-block is mapped in by this routine and unmapped at the end.
858 865 */
859 866 void
860 867 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
861 868 {
862 869 kthread_t *t;
863 870
864 871 ASSERT(MUTEX_HELD(&p->p_lock));
865 872
866 873 t = prchoose(p); /* returns locked thread */
867 874 ASSERT(t != NULL);
868 875 thread_unlock(t);
869 876
870 877 /* just bzero the process part, prgetlwpstatus() does the rest */
871 878 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
872 879 sp->pr_nlwp = p->p_lwpcnt;
873 880 sp->pr_nzomb = p->p_zombcnt;
874 881 prassignset(&sp->pr_sigpend, &p->p_sig);
875 882 sp->pr_brkbase = (uintptr_t)p->p_brkbase;
876 883 sp->pr_brksize = p->p_brksize;
877 884 sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
878 885 sp->pr_stksize = p->p_stksize;
879 886 sp->pr_pid = p->p_pid;
880 887 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
881 888 (p->p_flag & SZONETOP)) {
882 889 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
883 890 /*
884 891 * Inside local zones, fake zsched's pid as parent pids for
885 892 * processes which reference processes outside of the zone.
886 893 */
887 894 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
888 895 } else {
889 896 sp->pr_ppid = p->p_ppid;
890 897 }
891 898 sp->pr_pgid = p->p_pgrp;
892 899 sp->pr_sid = p->p_sessp->s_sid;
893 900 sp->pr_taskid = p->p_task->tk_tkid;
894 901 sp->pr_projid = p->p_task->tk_proj->kpj_id;
895 902 sp->pr_zoneid = p->p_zone->zone_id;
896 903 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
897 904 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
898 905 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
899 906 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
900 907 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
901 908 prassignset(&sp->pr_flttrace, &p->p_fltmask);
902 909 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
903 910 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
904 911 switch (p->p_model) {
905 912 case DATAMODEL_ILP32:
906 913 sp->pr_dmodel = PR_MODEL_ILP32;
907 914 break;
908 915 case DATAMODEL_LP64:
909 916 sp->pr_dmodel = PR_MODEL_LP64;
910 917 break;
911 918 }
912 919 if (p->p_agenttp)
913 920 sp->pr_agentid = p->p_agenttp->t_tid;
914 921
915 922 /* get the chosen lwp's status */
916 923 prgetlwpstatus(t, &sp->pr_lwp, zp);
917 924
918 925 /* replicate the flags */
919 926 sp->pr_flags = sp->pr_lwp.pr_flags;
920 927 }
921 928
922 929 #ifdef _SYSCALL32_IMPL
923 930 void
924 931 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
925 932 {
926 933 proc_t *p = ttoproc(t);
927 934 klwp_t *lwp = ttolwp(t);
928 935 struct mstate *ms = &lwp->lwp_mstate;
929 936 hrtime_t usr, sys;
930 937 int flags;
931 938 ulong_t instr;
932 939
933 940 ASSERT(MUTEX_HELD(&p->p_lock));
934 941
935 942 bzero(sp, sizeof (*sp));
936 943 flags = 0L;
937 944 if (t->t_state == TS_STOPPED) {
938 945 flags |= PR_STOPPED;
939 946 if ((t->t_schedflag & TS_PSTART) == 0)
940 947 flags |= PR_ISTOP;
941 948 } else if (VSTOPPED(t)) {
942 949 flags |= PR_STOPPED|PR_ISTOP;
943 950 }
944 951 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
945 952 flags |= PR_DSTOP;
946 953 if (lwp->lwp_asleep)
947 954 flags |= PR_ASLEEP;
948 955 if (t == p->p_agenttp)
949 956 flags |= PR_AGENT;
950 957 if (!(t->t_proc_flag & TP_TWAIT))
951 958 flags |= PR_DETACH;
952 959 if (t->t_proc_flag & TP_DAEMON)
953 960 flags |= PR_DAEMON;
954 961 if (p->p_proc_flag & P_PR_FORK)
955 962 flags |= PR_FORK;
956 963 if (p->p_proc_flag & P_PR_RUNLCL)
957 964 flags |= PR_RLC;
958 965 if (p->p_proc_flag & P_PR_KILLCL)
959 966 flags |= PR_KLC;
960 967 if (p->p_proc_flag & P_PR_ASYNC)
961 968 flags |= PR_ASYNC;
962 969 if (p->p_proc_flag & P_PR_BPTADJ)
963 970 flags |= PR_BPTADJ;
964 971 if (p->p_proc_flag & P_PR_PTRACE)
965 972 flags |= PR_PTRACE;
966 973 if (p->p_flag & SMSACCT)
967 974 flags |= PR_MSACCT;
968 975 if (p->p_flag & SMSFORK)
969 976 flags |= PR_MSFORK;
970 977 if (p->p_flag & SVFWAIT)
971 978 flags |= PR_VFORKP;
972 979 sp->pr_flags = flags;
973 980 if (VSTOPPED(t)) {
974 981 sp->pr_why = PR_REQUESTED;
975 982 sp->pr_what = 0;
976 983 } else {
977 984 sp->pr_why = t->t_whystop;
978 985 sp->pr_what = t->t_whatstop;
979 986 }
980 987 sp->pr_lwpid = t->t_tid;
981 988 sp->pr_cursig = lwp->lwp_cursig;
982 989 prassignset(&sp->pr_lwppend, &t->t_sig);
983 990 schedctl_finish_sigblock(t);
984 991 prassignset(&sp->pr_lwphold, &t->t_hold);
985 992 if (t->t_whystop == PR_FAULTED) {
986 993 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
987 994 if (t->t_whatstop == FLTPAGE)
988 995 sp->pr_info.si_addr =
989 996 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
990 997 } else if (lwp->lwp_curinfo)
991 998 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
992 999 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
993 1000 sp->pr_info.si_zoneid != zp->zone_id) {
994 1001 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
995 1002 sp->pr_info.si_uid = 0;
996 1003 sp->pr_info.si_ctid = -1;
997 1004 sp->pr_info.si_zoneid = zp->zone_id;
998 1005 }
999 1006 sp->pr_altstack.ss_sp =
1000 1007 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1001 1008 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1002 1009 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1003 1010 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1004 1011 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1005 1012 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1006 1013 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1007 1014 sizeof (sp->pr_clname) - 1);
1008 1015 if (flags & PR_STOPPED)
1009 1016 hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1010 1017 usr = ms->ms_acct[LMS_USER];
1011 1018 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1012 1019 scalehrtime(&usr);
1013 1020 scalehrtime(&sys);
1014 1021 hrt2ts32(usr, &sp->pr_utime);
1015 1022 hrt2ts32(sys, &sp->pr_stime);
1016 1023
1017 1024 /*
1018 1025 * Fetch the current instruction, if not a system process.
1019 1026 * We don't attempt this unless the lwp is stopped.
1020 1027 */
1021 1028 if ((p->p_flag & SSYS) || p->p_as == &kas)
1022 1029 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1023 1030 else if (!(flags & PR_STOPPED))
1024 1031 sp->pr_flags |= PR_PCINVAL;
1025 1032 else if (!prfetchinstr(lwp, &instr))
1026 1033 sp->pr_flags |= PR_PCINVAL;
1027 1034 else
1028 1035 sp->pr_instr = (uint32_t)instr;
1029 1036
1030 1037 /*
1031 1038 * Drop p_lock while touching the lwp's stack.
1032 1039 */
1033 1040 mutex_exit(&p->p_lock);
1034 1041 if (prisstep(lwp))
1035 1042 sp->pr_flags |= PR_STEP;
1036 1043 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1037 1044 int i;
1038 1045
1039 1046 sp->pr_syscall = get_syscall32_args(lwp,
1040 1047 (int *)sp->pr_sysarg, &i);
1041 1048 sp->pr_nsysarg = (ushort_t)i;
1042 1049 }
1043 1050 if ((flags & PR_STOPPED) || t == curthread)
1044 1051 prgetprregs32(lwp, sp->pr_reg);
1045 1052 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1046 1053 (flags & PR_VFORKP)) {
1047 1054 long r1, r2;
1048 1055 user_t *up;
1049 1056 auxv_t *auxp;
1050 1057 int i;
1051 1058
1052 1059 sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1053 1060 if (sp->pr_errno == 0) {
1054 1061 sp->pr_rval1 = (int32_t)r1;
1055 1062 sp->pr_rval2 = (int32_t)r2;
1056 1063 sp->pr_errpriv = PRIV_NONE;
1057 1064 } else
1058 1065 sp->pr_errpriv = lwp->lwp_badpriv;
1059 1066
1060 1067 if (t->t_sysnum == SYS_execve) {
1061 1068 up = PTOU(p);
1062 1069 sp->pr_sysarg[0] = 0;
1063 1070 sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1064 1071 sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1065 1072 for (i = 0, auxp = up->u_auxv;
1066 1073 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1067 1074 i++, auxp++) {
1068 1075 if (auxp->a_type == AT_SUN_EXECNAME) {
1069 1076 sp->pr_sysarg[0] =
1070 1077 (caddr32_t)
1071 1078 (uintptr_t)auxp->a_un.a_ptr;
1072 1079 break;
1073 1080 }
1074 1081 }
1075 1082 }
1076 1083 }
1077 1084 if (prhasfp())
1078 1085 prgetprfpregs32(lwp, &sp->pr_fpreg);
1079 1086 mutex_enter(&p->p_lock);
1080 1087 }
1081 1088
1082 1089 void
1083 1090 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1084 1091 {
1085 1092 kthread_t *t;
1086 1093
1087 1094 ASSERT(MUTEX_HELD(&p->p_lock));
1088 1095
1089 1096 t = prchoose(p); /* returns locked thread */
1090 1097 ASSERT(t != NULL);
1091 1098 thread_unlock(t);
1092 1099
1093 1100 /* just bzero the process part, prgetlwpstatus32() does the rest */
1094 1101 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1095 1102 sp->pr_nlwp = p->p_lwpcnt;
1096 1103 sp->pr_nzomb = p->p_zombcnt;
1097 1104 prassignset(&sp->pr_sigpend, &p->p_sig);
1098 1105 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1099 1106 sp->pr_brksize = (uint32_t)p->p_brksize;
1100 1107 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1101 1108 sp->pr_stksize = (uint32_t)p->p_stksize;
1102 1109 sp->pr_pid = p->p_pid;
1103 1110 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1104 1111 (p->p_flag & SZONETOP)) {
1105 1112 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1106 1113 /*
1107 1114 * Inside local zones, fake zsched's pid as parent pids for
1108 1115 * processes which reference processes outside of the zone.
1109 1116 */
1110 1117 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1111 1118 } else {
1112 1119 sp->pr_ppid = p->p_ppid;
1113 1120 }
1114 1121 sp->pr_pgid = p->p_pgrp;
1115 1122 sp->pr_sid = p->p_sessp->s_sid;
1116 1123 sp->pr_taskid = p->p_task->tk_tkid;
1117 1124 sp->pr_projid = p->p_task->tk_proj->kpj_id;
1118 1125 sp->pr_zoneid = p->p_zone->zone_id;
1119 1126 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1120 1127 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1121 1128 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1122 1129 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1123 1130 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1124 1131 prassignset(&sp->pr_flttrace, &p->p_fltmask);
1125 1132 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1126 1133 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1127 1134 switch (p->p_model) {
1128 1135 case DATAMODEL_ILP32:
1129 1136 sp->pr_dmodel = PR_MODEL_ILP32;
1130 1137 break;
1131 1138 case DATAMODEL_LP64:
1132 1139 sp->pr_dmodel = PR_MODEL_LP64;
1133 1140 break;
1134 1141 }
1135 1142 if (p->p_agenttp)
1136 1143 sp->pr_agentid = p->p_agenttp->t_tid;
1137 1144
1138 1145 /* get the chosen lwp's status */
1139 1146 prgetlwpstatus32(t, &sp->pr_lwp, zp);
1140 1147
1141 1148 /* replicate the flags */
1142 1149 sp->pr_flags = sp->pr_lwp.pr_flags;
1143 1150 }
1144 1151 #endif /* _SYSCALL32_IMPL */
1145 1152
1146 1153 /*
1147 1154 * Return lwp status.
1148 1155 */
1149 1156 void
1150 1157 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1151 1158 {
1152 1159 proc_t *p = ttoproc(t);
1153 1160 klwp_t *lwp = ttolwp(t);
1154 1161 struct mstate *ms = &lwp->lwp_mstate;
1155 1162 hrtime_t usr, sys;
1156 1163 int flags;
1157 1164 ulong_t instr;
1158 1165
1159 1166 ASSERT(MUTEX_HELD(&p->p_lock));
1160 1167
1161 1168 bzero(sp, sizeof (*sp));
1162 1169 flags = 0L;
1163 1170 if (t->t_state == TS_STOPPED) {
1164 1171 flags |= PR_STOPPED;
1165 1172 if ((t->t_schedflag & TS_PSTART) == 0)
1166 1173 flags |= PR_ISTOP;
1167 1174 } else if (VSTOPPED(t)) {
1168 1175 flags |= PR_STOPPED|PR_ISTOP;
1169 1176 }
1170 1177 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1171 1178 flags |= PR_DSTOP;
1172 1179 if (lwp->lwp_asleep)
1173 1180 flags |= PR_ASLEEP;
1174 1181 if (t == p->p_agenttp)
1175 1182 flags |= PR_AGENT;
1176 1183 if (!(t->t_proc_flag & TP_TWAIT))
1177 1184 flags |= PR_DETACH;
1178 1185 if (t->t_proc_flag & TP_DAEMON)
1179 1186 flags |= PR_DAEMON;
1180 1187 if (p->p_proc_flag & P_PR_FORK)
1181 1188 flags |= PR_FORK;
1182 1189 if (p->p_proc_flag & P_PR_RUNLCL)
1183 1190 flags |= PR_RLC;
1184 1191 if (p->p_proc_flag & P_PR_KILLCL)
1185 1192 flags |= PR_KLC;
1186 1193 if (p->p_proc_flag & P_PR_ASYNC)
1187 1194 flags |= PR_ASYNC;
1188 1195 if (p->p_proc_flag & P_PR_BPTADJ)
1189 1196 flags |= PR_BPTADJ;
1190 1197 if (p->p_proc_flag & P_PR_PTRACE)
1191 1198 flags |= PR_PTRACE;
1192 1199 if (p->p_flag & SMSACCT)
1193 1200 flags |= PR_MSACCT;
1194 1201 if (p->p_flag & SMSFORK)
1195 1202 flags |= PR_MSFORK;
1196 1203 if (p->p_flag & SVFWAIT)
1197 1204 flags |= PR_VFORKP;
1198 1205 if (p->p_pgidp->pid_pgorphaned)
1199 1206 flags |= PR_ORPHAN;
1200 1207 if (p->p_pidflag & CLDNOSIGCHLD)
1201 1208 flags |= PR_NOSIGCHLD;
1202 1209 if (p->p_pidflag & CLDWAITPID)
1203 1210 flags |= PR_WAITPID;
1204 1211 sp->pr_flags = flags;
1205 1212 if (VSTOPPED(t)) {
1206 1213 sp->pr_why = PR_REQUESTED;
1207 1214 sp->pr_what = 0;
1208 1215 } else {
1209 1216 sp->pr_why = t->t_whystop;
1210 1217 sp->pr_what = t->t_whatstop;
1211 1218 }
1212 1219 sp->pr_lwpid = t->t_tid;
1213 1220 sp->pr_cursig = lwp->lwp_cursig;
1214 1221 prassignset(&sp->pr_lwppend, &t->t_sig);
1215 1222 schedctl_finish_sigblock(t);
1216 1223 prassignset(&sp->pr_lwphold, &t->t_hold);
1217 1224 if (t->t_whystop == PR_FAULTED)
1218 1225 bcopy(&lwp->lwp_siginfo,
1219 1226 &sp->pr_info, sizeof (k_siginfo_t));
1220 1227 else if (lwp->lwp_curinfo)
1221 1228 bcopy(&lwp->lwp_curinfo->sq_info,
1222 1229 &sp->pr_info, sizeof (k_siginfo_t));
1223 1230 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1224 1231 sp->pr_info.si_zoneid != zp->zone_id) {
1225 1232 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1226 1233 sp->pr_info.si_uid = 0;
1227 1234 sp->pr_info.si_ctid = -1;
1228 1235 sp->pr_info.si_zoneid = zp->zone_id;
1229 1236 }
1230 1237 sp->pr_altstack = lwp->lwp_sigaltstack;
1231 1238 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1232 1239 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1233 1240 sp->pr_ustack = lwp->lwp_ustack;
1234 1241 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1235 1242 sizeof (sp->pr_clname) - 1);
1236 1243 if (flags & PR_STOPPED)
1237 1244 hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1238 1245 usr = ms->ms_acct[LMS_USER];
1239 1246 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1240 1247 scalehrtime(&usr);
1241 1248 scalehrtime(&sys);
1242 1249 hrt2ts(usr, &sp->pr_utime);
1243 1250 hrt2ts(sys, &sp->pr_stime);
1244 1251
1245 1252 /*
1246 1253 * Fetch the current instruction, if not a system process.
1247 1254 * We don't attempt this unless the lwp is stopped.
1248 1255 */
1249 1256 if ((p->p_flag & SSYS) || p->p_as == &kas)
1250 1257 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1251 1258 else if (!(flags & PR_STOPPED))
1252 1259 sp->pr_flags |= PR_PCINVAL;
1253 1260 else if (!prfetchinstr(lwp, &instr))
1254 1261 sp->pr_flags |= PR_PCINVAL;
1255 1262 else
1256 1263 sp->pr_instr = instr;
1257 1264
1258 1265 /*
1259 1266 * Drop p_lock while touching the lwp's stack.
1260 1267 */
1261 1268 mutex_exit(&p->p_lock);
1262 1269 if (prisstep(lwp))
1263 1270 sp->pr_flags |= PR_STEP;
1264 1271 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1265 1272 int i;
1266 1273
1267 1274 sp->pr_syscall = get_syscall_args(lwp,
1268 1275 (long *)sp->pr_sysarg, &i);
1269 1276 sp->pr_nsysarg = (ushort_t)i;
1270 1277 }
1271 1278 if ((flags & PR_STOPPED) || t == curthread)
1272 1279 prgetprregs(lwp, sp->pr_reg);
1273 1280 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1274 1281 (flags & PR_VFORKP)) {
1275 1282 user_t *up;
1276 1283 auxv_t *auxp;
1277 1284 int i;
1278 1285
1279 1286 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1280 1287 if (sp->pr_errno == 0)
1281 1288 sp->pr_errpriv = PRIV_NONE;
1282 1289 else
1283 1290 sp->pr_errpriv = lwp->lwp_badpriv;
1284 1291
1285 1292 if (t->t_sysnum == SYS_execve) {
1286 1293 up = PTOU(p);
1287 1294 sp->pr_sysarg[0] = 0;
1288 1295 sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1289 1296 sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1290 1297 for (i = 0, auxp = up->u_auxv;
1291 1298 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1292 1299 i++, auxp++) {
1293 1300 if (auxp->a_type == AT_SUN_EXECNAME) {
1294 1301 sp->pr_sysarg[0] =
1295 1302 (uintptr_t)auxp->a_un.a_ptr;
1296 1303 break;
1297 1304 }
1298 1305 }
1299 1306 }
1300 1307 }
1301 1308 if (prhasfp())
1302 1309 prgetprfpregs(lwp, &sp->pr_fpreg);
1303 1310 mutex_enter(&p->p_lock);
1304 1311 }
1305 1312
1306 1313 /*
1307 1314 * Get the sigaction structure for the specified signal. The u-block
1308 1315 * must already have been mapped in by the caller.
1309 1316 */
1310 1317 void
1311 1318 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1312 1319 {
1313 1320 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1314 1321
1315 1322 bzero(sp, sizeof (*sp));
1316 1323
1317 1324 if (sig != 0 && (unsigned)sig < nsig) {
1318 1325 sp->sa_handler = up->u_signal[sig-1];
1319 1326 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1320 1327 if (sigismember(&up->u_sigonstack, sig))
1321 1328 sp->sa_flags |= SA_ONSTACK;
1322 1329 if (sigismember(&up->u_sigresethand, sig))
1323 1330 sp->sa_flags |= SA_RESETHAND;
1324 1331 if (sigismember(&up->u_sigrestart, sig))
1325 1332 sp->sa_flags |= SA_RESTART;
1326 1333 if (sigismember(&p->p_siginfo, sig))
1327 1334 sp->sa_flags |= SA_SIGINFO;
1328 1335 if (sigismember(&up->u_signodefer, sig))
1329 1336 sp->sa_flags |= SA_NODEFER;
1330 1337 if (sig == SIGCLD) {
1331 1338 if (p->p_flag & SNOWAIT)
1332 1339 sp->sa_flags |= SA_NOCLDWAIT;
1333 1340 if ((p->p_flag & SJCTL) == 0)
1334 1341 sp->sa_flags |= SA_NOCLDSTOP;
1335 1342 }
1336 1343 }
1337 1344 }
1338 1345
1339 1346 #ifdef _SYSCALL32_IMPL
1340 1347 void
1341 1348 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1342 1349 {
1343 1350 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1344 1351
1345 1352 bzero(sp, sizeof (*sp));
1346 1353
1347 1354 if (sig != 0 && (unsigned)sig < nsig) {
1348 1355 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1349 1356 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1350 1357 if (sigismember(&up->u_sigonstack, sig))
1351 1358 sp->sa_flags |= SA_ONSTACK;
1352 1359 if (sigismember(&up->u_sigresethand, sig))
1353 1360 sp->sa_flags |= SA_RESETHAND;
1354 1361 if (sigismember(&up->u_sigrestart, sig))
1355 1362 sp->sa_flags |= SA_RESTART;
1356 1363 if (sigismember(&p->p_siginfo, sig))
1357 1364 sp->sa_flags |= SA_SIGINFO;
1358 1365 if (sigismember(&up->u_signodefer, sig))
1359 1366 sp->sa_flags |= SA_NODEFER;
1360 1367 if (sig == SIGCLD) {
1361 1368 if (p->p_flag & SNOWAIT)
1362 1369 sp->sa_flags |= SA_NOCLDWAIT;
1363 1370 if ((p->p_flag & SJCTL) == 0)
1364 1371 sp->sa_flags |= SA_NOCLDSTOP;
1365 1372 }
1366 1373 }
1367 1374 }
1368 1375 #endif /* _SYSCALL32_IMPL */
1369 1376
1370 1377 /*
1371 1378 * Count the number of segments in this process's address space.
1372 1379 */
1373 1380 int
1374 1381 prnsegs(struct as *as, int reserved)
1375 1382 {
1376 1383 int n = 0;
1377 1384 struct seg *seg;
1378 1385
1379 1386 ASSERT(as != &kas && AS_WRITE_HELD(as));
1380 1387
1381 1388 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1382 1389 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1383 1390 caddr_t saddr, naddr;
1384 1391 void *tmp = NULL;
1385 1392
1386 1393 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1387 1394 (void) pr_getprot(seg, reserved, &tmp,
1388 1395 &saddr, &naddr, eaddr);
1389 1396 if (saddr != naddr)
1390 1397 n++;
1391 1398 }
1392 1399
1393 1400 ASSERT(tmp == NULL);
1394 1401 }
1395 1402
1396 1403 return (n);
1397 1404 }
1398 1405
1399 1406 /*
1400 1407 * Convert uint32_t to decimal string w/o leading zeros.
1401 1408 * Add trailing null characters if 'len' is greater than string length.
1402 1409 * Return the string length.
1403 1410 */
1404 1411 int
1405 1412 pr_u32tos(uint32_t n, char *s, int len)
1406 1413 {
1407 1414 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */
1408 1415 char *cp = cbuf;
1409 1416 char *end = s + len;
1410 1417
1411 1418 do {
1412 1419 *cp++ = (char)(n % 10 + '0');
1413 1420 n /= 10;
1414 1421 } while (n);
1415 1422
1416 1423 len = (int)(cp - cbuf);
1417 1424
1418 1425 do {
1419 1426 *s++ = *--cp;
1420 1427 } while (cp > cbuf);
1421 1428
1422 1429 while (s < end) /* optional pad */
1423 1430 *s++ = '\0';
1424 1431
1425 1432 return (len);
1426 1433 }
1427 1434
1428 1435 /*
1429 1436 * Convert uint64_t to decimal string w/o leading zeros.
1430 1437 * Return the string length.
1431 1438 */
1432 1439 static int
1433 1440 pr_u64tos(uint64_t n, char *s)
1434 1441 {
1435 1442 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */
1436 1443 char *cp = cbuf;
1437 1444 int len;
1438 1445
1439 1446 do {
1440 1447 *cp++ = (char)(n % 10 + '0');
1441 1448 n /= 10;
1442 1449 } while (n);
1443 1450
1444 1451 len = (int)(cp - cbuf);
1445 1452
1446 1453 do {
1447 1454 *s++ = *--cp;
1448 1455 } while (cp > cbuf);
1449 1456
1450 1457 return (len);
1451 1458 }
1452 1459
1453 1460 void
1454 1461 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1455 1462 {
1456 1463 char *s = name;
1457 1464 struct vfs *vfsp;
1458 1465 struct vfssw *vfsswp;
1459 1466
1460 1467 if ((vfsp = vp->v_vfsp) != NULL &&
1461 1468 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1462 1469 *vfsswp->vsw_name) {
1463 1470 (void) strcpy(s, vfsswp->vsw_name);
1464 1471 s += strlen(s);
1465 1472 *s++ = '.';
1466 1473 }
1467 1474 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1468 1475 *s++ = '.';
1469 1476 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1470 1477 *s++ = '.';
1471 1478 s += pr_u64tos(vattr->va_nodeid, s);
1472 1479 *s++ = '\0';
1473 1480 }
1474 1481
1475 1482 struct seg *
1476 1483 break_seg(proc_t *p)
1477 1484 {
1478 1485 caddr_t addr = p->p_brkbase;
1479 1486 struct seg *seg;
1480 1487 struct vnode *vp;
1481 1488
1482 1489 if (p->p_brksize != 0)
1483 1490 addr += p->p_brksize - 1;
1484 1491 seg = as_segat(p->p_as, addr);
1485 1492 if (seg != NULL && seg->s_ops == &segvn_ops &&
1486 1493 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1487 1494 return (seg);
1488 1495 return (NULL);
1489 1496 }
1490 1497
1491 1498 /*
1492 1499 * Implementation of service functions to handle procfs generic chained
1493 1500 * copyout buffers.
1494 1501 */
1495 1502 typedef struct pr_iobuf_list {
1496 1503 list_node_t piol_link; /* buffer linkage */
1497 1504 size_t piol_size; /* total size (header + data) */
1498 1505 size_t piol_usedsize; /* amount to copy out from this buf */
1499 1506 } piol_t;
1500 1507
1501 1508 #define MAPSIZE (64 * 1024)
1502 1509 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1503 1510
1504 1511 void
1505 1512 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1506 1513 {
1507 1514 piol_t *iol;
1508 1515 size_t initial_size = MIN(1, n) * itemsize;
1509 1516
1510 1517 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1511 1518
1512 1519 ASSERT(list_head(iolhead) == NULL);
1513 1520 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1514 1521 ASSERT(initial_size > 0);
1515 1522
1516 1523 /*
1517 1524 * Someone creating chained copyout buffers may ask for less than
1518 1525 * MAPSIZE if the amount of data to be buffered is known to be
1519 1526 * smaller than that.
1520 1527 * But in order to prevent involuntary self-denial of service,
1521 1528 * the requested input size is clamped at MAPSIZE.
1522 1529 */
1523 1530 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1524 1531 iol = kmem_alloc(initial_size, KM_SLEEP);
1525 1532 list_insert_head(iolhead, iol);
1526 1533 iol->piol_usedsize = 0;
1527 1534 iol->piol_size = initial_size;
1528 1535 }
1529 1536
1530 1537 void *
1531 1538 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1532 1539 {
1533 1540 piol_t *iol;
1534 1541 char *new;
1535 1542
1536 1543 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1537 1544 ASSERT(list_head(iolhead) != NULL);
1538 1545
1539 1546 iol = (piol_t *)list_tail(iolhead);
1540 1547
1541 1548 if (iol->piol_size <
1542 1549 iol->piol_usedsize + sizeof (*iol) + itemsize) {
1543 1550 /*
1544 1551 * Out of space in the current buffer. Allocate more.
1545 1552 */
1546 1553 piol_t *newiol;
1547 1554
1548 1555 newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1549 1556 newiol->piol_size = MAPSIZE;
1550 1557 newiol->piol_usedsize = 0;
1551 1558
1552 1559 list_insert_after(iolhead, iol, newiol);
1553 1560 iol = list_next(iolhead, iol);
1554 1561 ASSERT(iol == newiol);
1555 1562 }
1556 1563 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1557 1564 iol->piol_usedsize += itemsize;
1558 1565 bzero(new, itemsize);
1559 1566 return (new);
1560 1567 }
1561 1568
1562 1569 int
1563 1570 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1564 1571 {
1565 1572 int error = errin;
1566 1573 piol_t *iol;
1567 1574
1568 1575 while ((iol = list_head(iolhead)) != NULL) {
1569 1576 list_remove(iolhead, iol);
1570 1577 if (!error) {
1571 1578 if (copyout(PIOL_DATABUF(iol), *tgt,
1572 1579 iol->piol_usedsize))
1573 1580 error = EFAULT;
1574 1581 *tgt += iol->piol_usedsize;
1575 1582 }
1576 1583 kmem_free(iol, iol->piol_size);
1577 1584 }
1578 1585 list_destroy(iolhead);
1579 1586
1580 1587 return (error);
1581 1588 }
1582 1589
1583 1590 int
1584 1591 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1585 1592 {
1586 1593 offset_t off = uiop->uio_offset;
1587 1594 char *base;
1588 1595 size_t size;
1589 1596 piol_t *iol;
1590 1597 int error = errin;
1591 1598
1592 1599 while ((iol = list_head(iolhead)) != NULL) {
1593 1600 list_remove(iolhead, iol);
1594 1601 base = PIOL_DATABUF(iol);
1595 1602 size = iol->piol_usedsize;
1596 1603 if (off <= size && error == 0 && uiop->uio_resid > 0)
1597 1604 error = uiomove(base + off, size - off,
1598 1605 UIO_READ, uiop);
1599 1606 off = MAX(0, off - (offset_t)size);
1600 1607 kmem_free(iol, iol->piol_size);
1601 1608 }
1602 1609 list_destroy(iolhead);
1603 1610
1604 1611 return (error);
1605 1612 }
1606 1613
1607 1614 /*
1608 1615 * Return an array of structures with memory map information.
1609 1616 * We allocate here; the caller must deallocate.
1610 1617 */
1611 1618 int
1612 1619 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1613 1620 {
1614 1621 struct as *as = p->p_as;
1615 1622 prmap_t *mp;
1616 1623 struct seg *seg;
1617 1624 struct seg *brkseg, *stkseg;
1618 1625 struct vnode *vp;
1619 1626 struct vattr vattr;
1620 1627 uint_t prot;
1621 1628
1622 1629 ASSERT(as != &kas && AS_WRITE_HELD(as));
1623 1630
1624 1631 /*
1625 1632 * Request an initial buffer size that doesn't waste memory
1626 1633 * if the address space has only a small number of segments.
1627 1634 */
1628 1635 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1629 1636
1630 1637 if ((seg = AS_SEGFIRST(as)) == NULL)
1631 1638 return (0);
1632 1639
1633 1640 brkseg = break_seg(p);
1634 1641 stkseg = as_segat(as, prgetstackbase(p));
1635 1642
1636 1643 do {
1637 1644 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1638 1645 caddr_t saddr, naddr;
1639 1646 void *tmp = NULL;
1640 1647
1641 1648 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1642 1649 prot = pr_getprot(seg, reserved, &tmp,
1643 1650 &saddr, &naddr, eaddr);
1644 1651 if (saddr == naddr)
1645 1652 continue;
1646 1653
1647 1654 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1648 1655
1649 1656 mp->pr_vaddr = (uintptr_t)saddr;
1650 1657 mp->pr_size = naddr - saddr;
1651 1658 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1652 1659 mp->pr_mflags = 0;
1653 1660 if (prot & PROT_READ)
1654 1661 mp->pr_mflags |= MA_READ;
1655 1662 if (prot & PROT_WRITE)
1656 1663 mp->pr_mflags |= MA_WRITE;
1657 1664 if (prot & PROT_EXEC)
1658 1665 mp->pr_mflags |= MA_EXEC;
1659 1666 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1660 1667 mp->pr_mflags |= MA_SHARED;
1661 1668 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1662 1669 mp->pr_mflags |= MA_NORESERVE;
1663 1670 if (seg->s_ops == &segspt_shmops ||
1664 1671 (seg->s_ops == &segvn_ops &&
1665 1672 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1666 1673 mp->pr_mflags |= MA_ANON;
1667 1674 if (seg == brkseg)
1668 1675 mp->pr_mflags |= MA_BREAK;
1669 1676 else if (seg == stkseg) {
1670 1677 mp->pr_mflags |= MA_STACK;
1671 1678 if (reserved) {
1672 1679 size_t maxstack =
1673 1680 ((size_t)p->p_stk_ctl +
1674 1681 PAGEOFFSET) & PAGEMASK;
1675 1682 mp->pr_vaddr =
1676 1683 (uintptr_t)prgetstackbase(p) +
1677 1684 p->p_stksize - maxstack;
1678 1685 mp->pr_size = (uintptr_t)naddr -
1679 1686 mp->pr_vaddr;
1680 1687 }
1681 1688 }
1682 1689 if (seg->s_ops == &segspt_shmops)
1683 1690 mp->pr_mflags |= MA_ISM | MA_SHM;
1684 1691 mp->pr_pagesize = PAGESIZE;
1685 1692
1686 1693 /*
1687 1694 * Manufacture a filename for the "object" directory.
1688 1695 */
1689 1696 vattr.va_mask = AT_FSID|AT_NODEID;
1690 1697 if (seg->s_ops == &segvn_ops &&
1691 1698 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1692 1699 vp != NULL && vp->v_type == VREG &&
1693 1700 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1694 1701 if (vp == p->p_exec)
1695 1702 (void) strcpy(mp->pr_mapname, "a.out");
1696 1703 else
1697 1704 pr_object_name(mp->pr_mapname,
1698 1705 vp, &vattr);
1699 1706 }
1700 1707
1701 1708 /*
1702 1709 * Get the SysV shared memory id, if any.
1703 1710 */
1704 1711 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1705 1712 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1706 1713 SHMID_NONE) {
1707 1714 if (mp->pr_shmid == SHMID_FREE)
1708 1715 mp->pr_shmid = -1;
1709 1716
1710 1717 mp->pr_mflags |= MA_SHM;
1711 1718 } else {
1712 1719 mp->pr_shmid = -1;
1713 1720 }
1714 1721 }
1715 1722 ASSERT(tmp == NULL);
1716 1723 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1717 1724
1718 1725 return (0);
1719 1726 }
1720 1727
1721 1728 #ifdef _SYSCALL32_IMPL
1722 1729 int
1723 1730 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1724 1731 {
1725 1732 struct as *as = p->p_as;
1726 1733 prmap32_t *mp;
1727 1734 struct seg *seg;
1728 1735 struct seg *brkseg, *stkseg;
1729 1736 struct vnode *vp;
1730 1737 struct vattr vattr;
1731 1738 uint_t prot;
1732 1739
1733 1740 ASSERT(as != &kas && AS_WRITE_HELD(as));
1734 1741
1735 1742 /*
1736 1743 * Request an initial buffer size that doesn't waste memory
1737 1744 * if the address space has only a small number of segments.
1738 1745 */
1739 1746 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1740 1747
1741 1748 if ((seg = AS_SEGFIRST(as)) == NULL)
1742 1749 return (0);
1743 1750
1744 1751 brkseg = break_seg(p);
1745 1752 stkseg = as_segat(as, prgetstackbase(p));
1746 1753
1747 1754 do {
1748 1755 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1749 1756 caddr_t saddr, naddr;
1750 1757 void *tmp = NULL;
1751 1758
1752 1759 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1753 1760 prot = pr_getprot(seg, reserved, &tmp,
1754 1761 &saddr, &naddr, eaddr);
1755 1762 if (saddr == naddr)
1756 1763 continue;
1757 1764
1758 1765 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1759 1766
1760 1767 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1761 1768 mp->pr_size = (size32_t)(naddr - saddr);
1762 1769 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1763 1770 mp->pr_mflags = 0;
1764 1771 if (prot & PROT_READ)
1765 1772 mp->pr_mflags |= MA_READ;
1766 1773 if (prot & PROT_WRITE)
1767 1774 mp->pr_mflags |= MA_WRITE;
1768 1775 if (prot & PROT_EXEC)
1769 1776 mp->pr_mflags |= MA_EXEC;
1770 1777 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1771 1778 mp->pr_mflags |= MA_SHARED;
1772 1779 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1773 1780 mp->pr_mflags |= MA_NORESERVE;
1774 1781 if (seg->s_ops == &segspt_shmops ||
1775 1782 (seg->s_ops == &segvn_ops &&
1776 1783 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1777 1784 mp->pr_mflags |= MA_ANON;
1778 1785 if (seg == brkseg)
1779 1786 mp->pr_mflags |= MA_BREAK;
1780 1787 else if (seg == stkseg) {
1781 1788 mp->pr_mflags |= MA_STACK;
1782 1789 if (reserved) {
1783 1790 size_t maxstack =
1784 1791 ((size_t)p->p_stk_ctl +
1785 1792 PAGEOFFSET) & PAGEMASK;
1786 1793 uintptr_t vaddr =
1787 1794 (uintptr_t)prgetstackbase(p) +
1788 1795 p->p_stksize - maxstack;
1789 1796 mp->pr_vaddr = (caddr32_t)vaddr;
1790 1797 mp->pr_size = (size32_t)
1791 1798 ((uintptr_t)naddr - vaddr);
1792 1799 }
1793 1800 }
1794 1801 if (seg->s_ops == &segspt_shmops)
1795 1802 mp->pr_mflags |= MA_ISM | MA_SHM;
1796 1803 mp->pr_pagesize = PAGESIZE;
1797 1804
1798 1805 /*
1799 1806 * Manufacture a filename for the "object" directory.
1800 1807 */
1801 1808 vattr.va_mask = AT_FSID|AT_NODEID;
1802 1809 if (seg->s_ops == &segvn_ops &&
1803 1810 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1804 1811 vp != NULL && vp->v_type == VREG &&
1805 1812 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1806 1813 if (vp == p->p_exec)
1807 1814 (void) strcpy(mp->pr_mapname, "a.out");
1808 1815 else
1809 1816 pr_object_name(mp->pr_mapname,
1810 1817 vp, &vattr);
1811 1818 }
1812 1819
1813 1820 /*
1814 1821 * Get the SysV shared memory id, if any.
1815 1822 */
1816 1823 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1817 1824 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1818 1825 SHMID_NONE) {
1819 1826 if (mp->pr_shmid == SHMID_FREE)
1820 1827 mp->pr_shmid = -1;
1821 1828
1822 1829 mp->pr_mflags |= MA_SHM;
1823 1830 } else {
1824 1831 mp->pr_shmid = -1;
1825 1832 }
1826 1833 }
1827 1834 ASSERT(tmp == NULL);
1828 1835 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1829 1836
1830 1837 return (0);
1831 1838 }
1832 1839 #endif /* _SYSCALL32_IMPL */
1833 1840
1834 1841 /*
1835 1842 * Return the size of the /proc page data file.
1836 1843 */
1837 1844 size_t
1838 1845 prpdsize(struct as *as)
1839 1846 {
1840 1847 struct seg *seg;
1841 1848 size_t size;
1842 1849
1843 1850 ASSERT(as != &kas && AS_WRITE_HELD(as));
1844 1851
1845 1852 if ((seg = AS_SEGFIRST(as)) == NULL)
1846 1853 return (0);
1847 1854
1848 1855 size = sizeof (prpageheader_t);
1849 1856 do {
1850 1857 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1851 1858 caddr_t saddr, naddr;
1852 1859 void *tmp = NULL;
1853 1860 size_t npage;
1854 1861
1855 1862 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1856 1863 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1857 1864 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1858 1865 size += sizeof (prasmap_t) + round8(npage);
1859 1866 }
1860 1867 ASSERT(tmp == NULL);
1861 1868 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1862 1869
1863 1870 return (size);
1864 1871 }
1865 1872
1866 1873 #ifdef _SYSCALL32_IMPL
1867 1874 size_t
1868 1875 prpdsize32(struct as *as)
1869 1876 {
1870 1877 struct seg *seg;
1871 1878 size_t size;
1872 1879
1873 1880 ASSERT(as != &kas && AS_WRITE_HELD(as));
1874 1881
1875 1882 if ((seg = AS_SEGFIRST(as)) == NULL)
1876 1883 return (0);
1877 1884
1878 1885 size = sizeof (prpageheader32_t);
1879 1886 do {
1880 1887 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1881 1888 caddr_t saddr, naddr;
1882 1889 void *tmp = NULL;
1883 1890 size_t npage;
1884 1891
1885 1892 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1886 1893 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1887 1894 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1888 1895 size += sizeof (prasmap32_t) + round8(npage);
1889 1896 }
1890 1897 ASSERT(tmp == NULL);
1891 1898 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1892 1899
1893 1900 return (size);
1894 1901 }
1895 1902 #endif /* _SYSCALL32_IMPL */
1896 1903
1897 1904 /*
1898 1905 * Read page data information.
1899 1906 */
1900 1907 int
1901 1908 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1902 1909 {
1903 1910 struct as *as = p->p_as;
1904 1911 caddr_t buf;
1905 1912 size_t size;
1906 1913 prpageheader_t *php;
1907 1914 prasmap_t *pmp;
1908 1915 struct seg *seg;
1909 1916 int error;
1910 1917
1911 1918 again:
1912 1919 AS_LOCK_ENTER(as, RW_WRITER);
1913 1920
1914 1921 if ((seg = AS_SEGFIRST(as)) == NULL) {
1915 1922 AS_LOCK_EXIT(as);
1916 1923 return (0);
1917 1924 }
1918 1925 size = prpdsize(as);
1919 1926 if (uiop->uio_resid < size) {
1920 1927 AS_LOCK_EXIT(as);
1921 1928 return (E2BIG);
1922 1929 }
1923 1930
1924 1931 buf = kmem_zalloc(size, KM_SLEEP);
1925 1932 php = (prpageheader_t *)buf;
1926 1933 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1927 1934
1928 1935 hrt2ts(gethrtime(), &php->pr_tstamp);
1929 1936 php->pr_nmap = 0;
1930 1937 php->pr_npage = 0;
1931 1938 do {
1932 1939 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1933 1940 caddr_t saddr, naddr;
1934 1941 void *tmp = NULL;
1935 1942
1936 1943 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1937 1944 struct vnode *vp;
1938 1945 struct vattr vattr;
1939 1946 size_t len;
1940 1947 size_t npage;
1941 1948 uint_t prot;
1942 1949 uintptr_t next;
1943 1950
1944 1951 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1945 1952 if ((len = (size_t)(naddr - saddr)) == 0)
1946 1953 continue;
1947 1954 npage = len / PAGESIZE;
1948 1955 next = (uintptr_t)(pmp + 1) + round8(npage);
1949 1956 /*
1950 1957 * It's possible that the address space can change
1951 1958 * subtlely even though we're holding as->a_lock
1952 1959 * due to the nondeterminism of page_exists() in
1953 1960 * the presence of asychronously flushed pages or
1954 1961 * mapped files whose sizes are changing.
1955 1962 * page_exists() may be called indirectly from
1956 1963 * pr_getprot() by a SEGOP_INCORE() routine.
1957 1964 * If this happens we need to make sure we don't
1958 1965 * overrun the buffer whose size we computed based
1959 1966 * on the initial iteration through the segments.
1960 1967 * Once we've detected an overflow, we need to clean
1961 1968 * up the temporary memory allocated in pr_getprot()
1962 1969 * and retry. If there's a pending signal, we return
1963 1970 * EINTR so that this thread can be dislodged if
1964 1971 * a latent bug causes us to spin indefinitely.
1965 1972 */
1966 1973 if (next > (uintptr_t)buf + size) {
1967 1974 pr_getprot_done(&tmp);
1968 1975 AS_LOCK_EXIT(as);
1969 1976
1970 1977 kmem_free(buf, size);
1971 1978
1972 1979 if (ISSIG(curthread, JUSTLOOKING))
1973 1980 return (EINTR);
1974 1981
1975 1982 goto again;
1976 1983 }
1977 1984
1978 1985 php->pr_nmap++;
1979 1986 php->pr_npage += npage;
1980 1987 pmp->pr_vaddr = (uintptr_t)saddr;
1981 1988 pmp->pr_npage = npage;
1982 1989 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1983 1990 pmp->pr_mflags = 0;
1984 1991 if (prot & PROT_READ)
1985 1992 pmp->pr_mflags |= MA_READ;
1986 1993 if (prot & PROT_WRITE)
1987 1994 pmp->pr_mflags |= MA_WRITE;
1988 1995 if (prot & PROT_EXEC)
1989 1996 pmp->pr_mflags |= MA_EXEC;
1990 1997 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1991 1998 pmp->pr_mflags |= MA_SHARED;
1992 1999 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1993 2000 pmp->pr_mflags |= MA_NORESERVE;
1994 2001 if (seg->s_ops == &segspt_shmops ||
1995 2002 (seg->s_ops == &segvn_ops &&
1996 2003 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1997 2004 pmp->pr_mflags |= MA_ANON;
1998 2005 if (seg->s_ops == &segspt_shmops)
1999 2006 pmp->pr_mflags |= MA_ISM | MA_SHM;
2000 2007 pmp->pr_pagesize = PAGESIZE;
2001 2008 /*
2002 2009 * Manufacture a filename for the "object" directory.
2003 2010 */
2004 2011 vattr.va_mask = AT_FSID|AT_NODEID;
2005 2012 if (seg->s_ops == &segvn_ops &&
2006 2013 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2007 2014 vp != NULL && vp->v_type == VREG &&
2008 2015 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2009 2016 if (vp == p->p_exec)
2010 2017 (void) strcpy(pmp->pr_mapname, "a.out");
2011 2018 else
2012 2019 pr_object_name(pmp->pr_mapname,
2013 2020 vp, &vattr);
2014 2021 }
2015 2022
2016 2023 /*
2017 2024 * Get the SysV shared memory id, if any.
2018 2025 */
2019 2026 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2020 2027 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2021 2028 SHMID_NONE) {
2022 2029 if (pmp->pr_shmid == SHMID_FREE)
2023 2030 pmp->pr_shmid = -1;
2024 2031
2025 2032 pmp->pr_mflags |= MA_SHM;
2026 2033 } else {
2027 2034 pmp->pr_shmid = -1;
2028 2035 }
2029 2036
2030 2037 hat_getstat(as, saddr, len, hatid,
2031 2038 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2032 2039 pmp = (prasmap_t *)next;
2033 2040 }
2034 2041 ASSERT(tmp == NULL);
2035 2042 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2036 2043
2037 2044 AS_LOCK_EXIT(as);
2038 2045
2039 2046 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2040 2047 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2041 2048 kmem_free(buf, size);
2042 2049
2043 2050 return (error);
2044 2051 }
2045 2052
2046 2053 #ifdef _SYSCALL32_IMPL
2047 2054 int
2048 2055 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2049 2056 {
2050 2057 struct as *as = p->p_as;
2051 2058 caddr_t buf;
2052 2059 size_t size;
2053 2060 prpageheader32_t *php;
2054 2061 prasmap32_t *pmp;
2055 2062 struct seg *seg;
2056 2063 int error;
2057 2064
2058 2065 again:
2059 2066 AS_LOCK_ENTER(as, RW_WRITER);
2060 2067
2061 2068 if ((seg = AS_SEGFIRST(as)) == NULL) {
2062 2069 AS_LOCK_EXIT(as);
2063 2070 return (0);
2064 2071 }
2065 2072 size = prpdsize32(as);
2066 2073 if (uiop->uio_resid < size) {
2067 2074 AS_LOCK_EXIT(as);
2068 2075 return (E2BIG);
2069 2076 }
2070 2077
2071 2078 buf = kmem_zalloc(size, KM_SLEEP);
2072 2079 php = (prpageheader32_t *)buf;
2073 2080 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2074 2081
2075 2082 hrt2ts32(gethrtime(), &php->pr_tstamp);
2076 2083 php->pr_nmap = 0;
2077 2084 php->pr_npage = 0;
2078 2085 do {
2079 2086 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2080 2087 caddr_t saddr, naddr;
2081 2088 void *tmp = NULL;
2082 2089
2083 2090 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2084 2091 struct vnode *vp;
2085 2092 struct vattr vattr;
2086 2093 size_t len;
2087 2094 size_t npage;
2088 2095 uint_t prot;
2089 2096 uintptr_t next;
2090 2097
2091 2098 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2092 2099 if ((len = (size_t)(naddr - saddr)) == 0)
2093 2100 continue;
2094 2101 npage = len / PAGESIZE;
2095 2102 next = (uintptr_t)(pmp + 1) + round8(npage);
2096 2103 /*
2097 2104 * It's possible that the address space can change
2098 2105 * subtlely even though we're holding as->a_lock
2099 2106 * due to the nondeterminism of page_exists() in
2100 2107 * the presence of asychronously flushed pages or
2101 2108 * mapped files whose sizes are changing.
2102 2109 * page_exists() may be called indirectly from
2103 2110 * pr_getprot() by a SEGOP_INCORE() routine.
2104 2111 * If this happens we need to make sure we don't
2105 2112 * overrun the buffer whose size we computed based
2106 2113 * on the initial iteration through the segments.
2107 2114 * Once we've detected an overflow, we need to clean
2108 2115 * up the temporary memory allocated in pr_getprot()
2109 2116 * and retry. If there's a pending signal, we return
2110 2117 * EINTR so that this thread can be dislodged if
2111 2118 * a latent bug causes us to spin indefinitely.
2112 2119 */
2113 2120 if (next > (uintptr_t)buf + size) {
2114 2121 pr_getprot_done(&tmp);
2115 2122 AS_LOCK_EXIT(as);
2116 2123
2117 2124 kmem_free(buf, size);
2118 2125
2119 2126 if (ISSIG(curthread, JUSTLOOKING))
2120 2127 return (EINTR);
2121 2128
2122 2129 goto again;
2123 2130 }
2124 2131
2125 2132 php->pr_nmap++;
2126 2133 php->pr_npage += npage;
2127 2134 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2128 2135 pmp->pr_npage = (size32_t)npage;
2129 2136 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2130 2137 pmp->pr_mflags = 0;
2131 2138 if (prot & PROT_READ)
2132 2139 pmp->pr_mflags |= MA_READ;
2133 2140 if (prot & PROT_WRITE)
2134 2141 pmp->pr_mflags |= MA_WRITE;
2135 2142 if (prot & PROT_EXEC)
2136 2143 pmp->pr_mflags |= MA_EXEC;
2137 2144 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2138 2145 pmp->pr_mflags |= MA_SHARED;
2139 2146 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2140 2147 pmp->pr_mflags |= MA_NORESERVE;
2141 2148 if (seg->s_ops == &segspt_shmops ||
2142 2149 (seg->s_ops == &segvn_ops &&
2143 2150 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2144 2151 pmp->pr_mflags |= MA_ANON;
2145 2152 if (seg->s_ops == &segspt_shmops)
2146 2153 pmp->pr_mflags |= MA_ISM | MA_SHM;
2147 2154 pmp->pr_pagesize = PAGESIZE;
2148 2155 /*
2149 2156 * Manufacture a filename for the "object" directory.
2150 2157 */
2151 2158 vattr.va_mask = AT_FSID|AT_NODEID;
2152 2159 if (seg->s_ops == &segvn_ops &&
2153 2160 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2154 2161 vp != NULL && vp->v_type == VREG &&
2155 2162 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2156 2163 if (vp == p->p_exec)
2157 2164 (void) strcpy(pmp->pr_mapname, "a.out");
2158 2165 else
2159 2166 pr_object_name(pmp->pr_mapname,
2160 2167 vp, &vattr);
2161 2168 }
2162 2169
2163 2170 /*
2164 2171 * Get the SysV shared memory id, if any.
2165 2172 */
2166 2173 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2167 2174 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2168 2175 SHMID_NONE) {
2169 2176 if (pmp->pr_shmid == SHMID_FREE)
2170 2177 pmp->pr_shmid = -1;
2171 2178
2172 2179 pmp->pr_mflags |= MA_SHM;
2173 2180 } else {
2174 2181 pmp->pr_shmid = -1;
2175 2182 }
2176 2183
2177 2184 hat_getstat(as, saddr, len, hatid,
2178 2185 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2179 2186 pmp = (prasmap32_t *)next;
2180 2187 }
2181 2188 ASSERT(tmp == NULL);
2182 2189 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2183 2190
2184 2191 AS_LOCK_EXIT(as);
2185 2192
2186 2193 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2187 2194 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2188 2195 kmem_free(buf, size);
2189 2196
2190 2197 return (error);
2191 2198 }
2192 2199 #endif /* _SYSCALL32_IMPL */
2193 2200
2194 2201 ushort_t
2195 2202 prgetpctcpu(uint64_t pct)
2196 2203 {
2197 2204 /*
2198 2205 * The value returned will be relevant in the zone of the examiner,
2199 2206 * which may not be the same as the zone which performed the procfs
2200 2207 * mount.
2201 2208 */
2202 2209 int nonline = zone_ncpus_online_get(curproc->p_zone);
2203 2210
2204 2211 /*
2205 2212 * Prorate over online cpus so we don't exceed 100%
2206 2213 */
2207 2214 if (nonline > 1)
2208 2215 pct /= nonline;
2209 2216 pct >>= 16; /* convert to 16-bit scaled integer */
2210 2217 if (pct > 0x8000) /* might happen, due to rounding */
2211 2218 pct = 0x8000;
2212 2219 return ((ushort_t)pct);
2213 2220 }
2214 2221
2215 2222 /*
2216 2223 * Return information used by ps(1).
2217 2224 */
2218 2225 void
2219 2226 prgetpsinfo(proc_t *p, psinfo_t *psp)
2220 2227 {
2221 2228 kthread_t *t;
2222 2229 struct cred *cred;
2223 2230 hrtime_t hrutime, hrstime;
2224 2231
2225 2232 ASSERT(MUTEX_HELD(&p->p_lock));
2226 2233
2227 2234 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2228 2235 bzero(psp, sizeof (*psp));
2229 2236 else {
2230 2237 thread_unlock(t);
2231 2238 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2232 2239 }
2233 2240
2234 2241 /*
2235 2242 * only export SSYS and SMSACCT; everything else is off-limits to
2236 2243 * userland apps.
2237 2244 */
2238 2245 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2239 2246 psp->pr_nlwp = p->p_lwpcnt;
2240 2247 psp->pr_nzomb = p->p_zombcnt;
2241 2248 mutex_enter(&p->p_crlock);
2242 2249 cred = p->p_cred;
2243 2250 psp->pr_uid = crgetruid(cred);
2244 2251 psp->pr_euid = crgetuid(cred);
2245 2252 psp->pr_gid = crgetrgid(cred);
2246 2253 psp->pr_egid = crgetgid(cred);
2247 2254 mutex_exit(&p->p_crlock);
2248 2255 psp->pr_pid = p->p_pid;
2249 2256 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2250 2257 (p->p_flag & SZONETOP)) {
2251 2258 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2252 2259 /*
2253 2260 * Inside local zones, fake zsched's pid as parent pids for
2254 2261 * processes which reference processes outside of the zone.
2255 2262 */
2256 2263 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2257 2264 } else {
2258 2265 psp->pr_ppid = p->p_ppid;
2259 2266 }
2260 2267 psp->pr_pgid = p->p_pgrp;
2261 2268 psp->pr_sid = p->p_sessp->s_sid;
2262 2269 psp->pr_taskid = p->p_task->tk_tkid;
2263 2270 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2264 2271 psp->pr_poolid = p->p_pool->pool_id;
2265 2272 psp->pr_zoneid = p->p_zone->zone_id;
2266 2273 if ((psp->pr_contract = PRCTID(p)) == 0)
2267 2274 psp->pr_contract = -1;
2268 2275 psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2269 2276 switch (p->p_model) {
2270 2277 case DATAMODEL_ILP32:
2271 2278 psp->pr_dmodel = PR_MODEL_ILP32;
2272 2279 break;
2273 2280 case DATAMODEL_LP64:
2274 2281 psp->pr_dmodel = PR_MODEL_LP64;
2275 2282 break;
2276 2283 }
2277 2284 hrutime = mstate_aggr_state(p, LMS_USER);
2278 2285 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2279 2286 hrt2ts((hrutime + hrstime), &psp->pr_time);
2280 2287 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2281 2288
2282 2289 if (t == NULL) {
2283 2290 int wcode = p->p_wcode; /* must be atomic read */
2284 2291
2285 2292 if (wcode)
2286 2293 psp->pr_wstat = wstat(wcode, p->p_wdata);
2287 2294 psp->pr_ttydev = PRNODEV;
2288 2295 psp->pr_lwp.pr_state = SZOMB;
2289 2296 psp->pr_lwp.pr_sname = 'Z';
2290 2297 psp->pr_lwp.pr_bindpro = PBIND_NONE;
2291 2298 psp->pr_lwp.pr_bindpset = PS_NONE;
2292 2299 } else {
2293 2300 user_t *up = PTOU(p);
2294 2301 struct as *as;
2295 2302 dev_t d;
2296 2303 extern dev_t rwsconsdev, rconsdev, uconsdev;
2297 2304
2298 2305 d = cttydev(p);
2299 2306 /*
2300 2307 * If the controlling terminal is the real
2301 2308 * or workstation console device, map to what the
2302 2309 * user thinks is the console device. Handle case when
2303 2310 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2304 2311 */
2305 2312 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2306 2313 d = uconsdev;
2307 2314 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2308 2315 psp->pr_start = up->u_start;
2309 2316 bcopy(up->u_comm, psp->pr_fname,
2310 2317 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2311 2318 bcopy(up->u_psargs, psp->pr_psargs,
2312 2319 MIN(PRARGSZ-1, PSARGSZ));
2313 2320 psp->pr_argc = up->u_argc;
2314 2321 psp->pr_argv = up->u_argv;
2315 2322 psp->pr_envp = up->u_envp;
2316 2323
2317 2324 /* get the chosen lwp's lwpsinfo */
2318 2325 prgetlwpsinfo(t, &psp->pr_lwp);
2319 2326
2320 2327 /* compute %cpu for the process */
2321 2328 if (p->p_lwpcnt == 1)
2322 2329 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2323 2330 else {
2324 2331 uint64_t pct = 0;
2325 2332 hrtime_t cur_time = gethrtime_unscaled();
2326 2333
2327 2334 t = p->p_tlist;
2328 2335 do {
2329 2336 pct += cpu_update_pct(t, cur_time);
2330 2337 } while ((t = t->t_forw) != p->p_tlist);
2331 2338
2332 2339 psp->pr_pctcpu = prgetpctcpu(pct);
2333 2340 }
2334 2341 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2335 2342 psp->pr_size = 0;
2336 2343 psp->pr_rssize = 0;
2337 2344 } else {
2338 2345 mutex_exit(&p->p_lock);
2339 2346 AS_LOCK_ENTER(as, RW_READER);
2340 2347 psp->pr_size = btopr(as->a_resvsize) *
2341 2348 (PAGESIZE / 1024);
2342 2349 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2343 2350 psp->pr_pctmem = rm_pctmemory(as);
2344 2351 AS_LOCK_EXIT(as);
2345 2352 mutex_enter(&p->p_lock);
2346 2353 }
2347 2354 }
2348 2355 }
2349 2356
2350 2357 #ifdef _SYSCALL32_IMPL
2351 2358 void
2352 2359 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2353 2360 {
2354 2361 kthread_t *t;
2355 2362 struct cred *cred;
2356 2363 hrtime_t hrutime, hrstime;
2357 2364
2358 2365 ASSERT(MUTEX_HELD(&p->p_lock));
2359 2366
2360 2367 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2361 2368 bzero(psp, sizeof (*psp));
2362 2369 else {
2363 2370 thread_unlock(t);
2364 2371 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2365 2372 }
2366 2373
2367 2374 /*
2368 2375 * only export SSYS and SMSACCT; everything else is off-limits to
2369 2376 * userland apps.
2370 2377 */
2371 2378 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2372 2379 psp->pr_nlwp = p->p_lwpcnt;
2373 2380 psp->pr_nzomb = p->p_zombcnt;
2374 2381 mutex_enter(&p->p_crlock);
2375 2382 cred = p->p_cred;
2376 2383 psp->pr_uid = crgetruid(cred);
2377 2384 psp->pr_euid = crgetuid(cred);
2378 2385 psp->pr_gid = crgetrgid(cred);
2379 2386 psp->pr_egid = crgetgid(cred);
2380 2387 mutex_exit(&p->p_crlock);
2381 2388 psp->pr_pid = p->p_pid;
2382 2389 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2383 2390 (p->p_flag & SZONETOP)) {
2384 2391 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2385 2392 /*
2386 2393 * Inside local zones, fake zsched's pid as parent pids for
2387 2394 * processes which reference processes outside of the zone.
2388 2395 */
2389 2396 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2390 2397 } else {
2391 2398 psp->pr_ppid = p->p_ppid;
2392 2399 }
2393 2400 psp->pr_pgid = p->p_pgrp;
2394 2401 psp->pr_sid = p->p_sessp->s_sid;
2395 2402 psp->pr_taskid = p->p_task->tk_tkid;
2396 2403 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2397 2404 psp->pr_poolid = p->p_pool->pool_id;
2398 2405 psp->pr_zoneid = p->p_zone->zone_id;
2399 2406 if ((psp->pr_contract = PRCTID(p)) == 0)
2400 2407 psp->pr_contract = -1;
2401 2408 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2402 2409 switch (p->p_model) {
2403 2410 case DATAMODEL_ILP32:
2404 2411 psp->pr_dmodel = PR_MODEL_ILP32;
2405 2412 break;
2406 2413 case DATAMODEL_LP64:
2407 2414 psp->pr_dmodel = PR_MODEL_LP64;
2408 2415 break;
2409 2416 }
2410 2417 hrutime = mstate_aggr_state(p, LMS_USER);
2411 2418 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2412 2419 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2413 2420 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2414 2421
2415 2422 if (t == NULL) {
2416 2423 extern int wstat(int, int); /* needs a header file */
2417 2424 int wcode = p->p_wcode; /* must be atomic read */
2418 2425
2419 2426 if (wcode)
2420 2427 psp->pr_wstat = wstat(wcode, p->p_wdata);
2421 2428 psp->pr_ttydev = PRNODEV32;
2422 2429 psp->pr_lwp.pr_state = SZOMB;
2423 2430 psp->pr_lwp.pr_sname = 'Z';
2424 2431 } else {
2425 2432 user_t *up = PTOU(p);
2426 2433 struct as *as;
2427 2434 dev_t d;
2428 2435 extern dev_t rwsconsdev, rconsdev, uconsdev;
2429 2436
2430 2437 d = cttydev(p);
2431 2438 /*
2432 2439 * If the controlling terminal is the real
2433 2440 * or workstation console device, map to what the
2434 2441 * user thinks is the console device. Handle case when
2435 2442 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2436 2443 */
2437 2444 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2438 2445 d = uconsdev;
2439 2446 (void) cmpldev(&psp->pr_ttydev, d);
2440 2447 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2441 2448 bcopy(up->u_comm, psp->pr_fname,
2442 2449 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2443 2450 bcopy(up->u_psargs, psp->pr_psargs,
2444 2451 MIN(PRARGSZ-1, PSARGSZ));
2445 2452 psp->pr_argc = up->u_argc;
2446 2453 psp->pr_argv = (caddr32_t)up->u_argv;
2447 2454 psp->pr_envp = (caddr32_t)up->u_envp;
2448 2455
2449 2456 /* get the chosen lwp's lwpsinfo */
2450 2457 prgetlwpsinfo32(t, &psp->pr_lwp);
2451 2458
2452 2459 /* compute %cpu for the process */
2453 2460 if (p->p_lwpcnt == 1)
2454 2461 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2455 2462 else {
2456 2463 uint64_t pct = 0;
2457 2464 hrtime_t cur_time;
2458 2465
2459 2466 t = p->p_tlist;
2460 2467 cur_time = gethrtime_unscaled();
2461 2468 do {
2462 2469 pct += cpu_update_pct(t, cur_time);
2463 2470 } while ((t = t->t_forw) != p->p_tlist);
2464 2471
2465 2472 psp->pr_pctcpu = prgetpctcpu(pct);
2466 2473 }
2467 2474 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2468 2475 psp->pr_size = 0;
2469 2476 psp->pr_rssize = 0;
2470 2477 } else {
2471 2478 mutex_exit(&p->p_lock);
2472 2479 AS_LOCK_ENTER(as, RW_READER);
2473 2480 psp->pr_size = (size32_t)
2474 2481 (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2475 2482 psp->pr_rssize = (size32_t)
2476 2483 (rm_asrss(as) * (PAGESIZE / 1024));
2477 2484 psp->pr_pctmem = rm_pctmemory(as);
2478 2485 AS_LOCK_EXIT(as);
2479 2486 mutex_enter(&p->p_lock);
2480 2487 }
2481 2488 }
2482 2489
2483 2490 /*
2484 2491 * If we are looking at an LP64 process, zero out
2485 2492 * the fields that cannot be represented in ILP32.
2486 2493 */
2487 2494 if (p->p_model != DATAMODEL_ILP32) {
2488 2495 psp->pr_size = 0;
2489 2496 psp->pr_rssize = 0;
2490 2497 psp->pr_argv = 0;
2491 2498 psp->pr_envp = 0;
2492 2499 }
2493 2500 }
2494 2501
2495 2502 #endif /* _SYSCALL32_IMPL */
2496 2503
2497 2504 void
2498 2505 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2499 2506 {
2500 2507 klwp_t *lwp = ttolwp(t);
2501 2508 sobj_ops_t *sobj;
2502 2509 char c, state;
2503 2510 uint64_t pct;
2504 2511 int retval, niceval;
2505 2512 hrtime_t hrutime, hrstime;
2506 2513
2507 2514 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2508 2515
2509 2516 bzero(psp, sizeof (*psp));
2510 2517
2511 2518 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2512 2519 psp->pr_lwpid = t->t_tid;
2513 2520 psp->pr_addr = (uintptr_t)t;
2514 2521 psp->pr_wchan = (uintptr_t)t->t_wchan;
2515 2522
2516 2523 /* map the thread state enum into a process state enum */
2517 2524 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2518 2525 switch (state) {
2519 2526 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2520 2527 case TS_RUN: state = SRUN; c = 'R'; break;
2521 2528 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2522 2529 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2523 2530 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2524 2531 case TS_WAIT: state = SWAIT; c = 'W'; break;
2525 2532 default: state = 0; c = '?'; break;
2526 2533 }
2527 2534 psp->pr_state = state;
2528 2535 psp->pr_sname = c;
2529 2536 if ((sobj = t->t_sobj_ops) != NULL)
2530 2537 psp->pr_stype = SOBJ_TYPE(sobj);
2531 2538 retval = CL_DONICE(t, NULL, 0, &niceval);
2532 2539 if (retval == 0) {
2533 2540 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2534 2541 psp->pr_nice = niceval + NZERO;
2535 2542 }
2536 2543 psp->pr_syscall = t->t_sysnum;
2537 2544 psp->pr_pri = t->t_pri;
2538 2545 psp->pr_start.tv_sec = t->t_start;
2539 2546 psp->pr_start.tv_nsec = 0L;
2540 2547 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2541 2548 scalehrtime(&hrutime);
2542 2549 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2543 2550 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2544 2551 scalehrtime(&hrstime);
2545 2552 hrt2ts(hrutime + hrstime, &psp->pr_time);
2546 2553 /* compute %cpu for the lwp */
2547 2554 pct = cpu_update_pct(t, gethrtime_unscaled());
2548 2555 psp->pr_pctcpu = prgetpctcpu(pct);
2549 2556 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2550 2557 if (psp->pr_cpu > 99)
2551 2558 psp->pr_cpu = 99;
2552 2559
2553 2560 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2554 2561 sizeof (psp->pr_clname) - 1);
2555 2562 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2556 2563 psp->pr_onpro = t->t_cpu->cpu_id;
2557 2564 psp->pr_bindpro = t->t_bind_cpu;
2558 2565 psp->pr_bindpset = t->t_bind_pset;
2559 2566 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2560 2567 }
2561 2568
2562 2569 #ifdef _SYSCALL32_IMPL
2563 2570 void
2564 2571 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2565 2572 {
2566 2573 proc_t *p = ttoproc(t);
2567 2574 klwp_t *lwp = ttolwp(t);
2568 2575 sobj_ops_t *sobj;
2569 2576 char c, state;
2570 2577 uint64_t pct;
2571 2578 int retval, niceval;
2572 2579 hrtime_t hrutime, hrstime;
2573 2580
2574 2581 ASSERT(MUTEX_HELD(&p->p_lock));
2575 2582
2576 2583 bzero(psp, sizeof (*psp));
2577 2584
2578 2585 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2579 2586 psp->pr_lwpid = t->t_tid;
2580 2587 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2581 2588 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */
2582 2589
2583 2590 /* map the thread state enum into a process state enum */
2584 2591 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2585 2592 switch (state) {
2586 2593 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2587 2594 case TS_RUN: state = SRUN; c = 'R'; break;
2588 2595 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2589 2596 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2590 2597 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2591 2598 case TS_WAIT: state = SWAIT; c = 'W'; break;
2592 2599 default: state = 0; c = '?'; break;
2593 2600 }
2594 2601 psp->pr_state = state;
2595 2602 psp->pr_sname = c;
2596 2603 if ((sobj = t->t_sobj_ops) != NULL)
2597 2604 psp->pr_stype = SOBJ_TYPE(sobj);
2598 2605 retval = CL_DONICE(t, NULL, 0, &niceval);
2599 2606 if (retval == 0) {
2600 2607 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2601 2608 psp->pr_nice = niceval + NZERO;
2602 2609 } else {
2603 2610 psp->pr_oldpri = 0;
2604 2611 psp->pr_nice = 0;
2605 2612 }
2606 2613 psp->pr_syscall = t->t_sysnum;
2607 2614 psp->pr_pri = t->t_pri;
2608 2615 psp->pr_start.tv_sec = (time32_t)t->t_start;
2609 2616 psp->pr_start.tv_nsec = 0L;
2610 2617 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2611 2618 scalehrtime(&hrutime);
2612 2619 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2613 2620 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2614 2621 scalehrtime(&hrstime);
2615 2622 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2616 2623 /* compute %cpu for the lwp */
2617 2624 pct = cpu_update_pct(t, gethrtime_unscaled());
2618 2625 psp->pr_pctcpu = prgetpctcpu(pct);
2619 2626 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2620 2627 if (psp->pr_cpu > 99)
2621 2628 psp->pr_cpu = 99;
2622 2629
2623 2630 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2624 2631 sizeof (psp->pr_clname) - 1);
2625 2632 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2626 2633 psp->pr_onpro = t->t_cpu->cpu_id;
2627 2634 psp->pr_bindpro = t->t_bind_cpu;
2628 2635 psp->pr_bindpset = t->t_bind_pset;
2629 2636 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2630 2637 }
2631 2638 #endif /* _SYSCALL32_IMPL */
2632 2639
2633 2640 #ifdef _SYSCALL32_IMPL
2634 2641
2635 2642 #define PR_COPY_FIELD(s, d, field) d->field = s->field
2636 2643
2637 2644 #define PR_COPY_FIELD_ILP32(s, d, field) \
2638 2645 if (s->pr_dmodel == PR_MODEL_ILP32) { \
2639 2646 d->field = s->field; \
2640 2647 }
2641 2648
2642 2649 #define PR_COPY_TIMESPEC(s, d, field) \
2643 2650 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
2644 2651
2645 2652 #define PR_COPY_BUF(s, d, field) \
2646 2653 bcopy(s->field, d->field, sizeof (d->field));
2647 2654
2648 2655 #define PR_IGNORE_FIELD(s, d, field)
2649 2656
2650 2657 void
2651 2658 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
2652 2659 {
2653 2660 bzero(dest, sizeof (*dest));
2654 2661
2655 2662 PR_COPY_FIELD(src, dest, pr_flag);
2656 2663 PR_COPY_FIELD(src, dest, pr_lwpid);
2657 2664 PR_IGNORE_FIELD(src, dest, pr_addr);
2658 2665 PR_IGNORE_FIELD(src, dest, pr_wchan);
2659 2666 PR_COPY_FIELD(src, dest, pr_stype);
2660 2667 PR_COPY_FIELD(src, dest, pr_state);
2661 2668 PR_COPY_FIELD(src, dest, pr_sname);
2662 2669 PR_COPY_FIELD(src, dest, pr_nice);
2663 2670 PR_COPY_FIELD(src, dest, pr_syscall);
2664 2671 PR_COPY_FIELD(src, dest, pr_oldpri);
2665 2672 PR_COPY_FIELD(src, dest, pr_cpu);
2666 2673 PR_COPY_FIELD(src, dest, pr_pri);
2667 2674 PR_COPY_FIELD(src, dest, pr_pctcpu);
2668 2675 PR_COPY_TIMESPEC(src, dest, pr_start);
2669 2676 PR_COPY_BUF(src, dest, pr_clname);
2670 2677 PR_COPY_BUF(src, dest, pr_name);
2671 2678 PR_COPY_FIELD(src, dest, pr_onpro);
2672 2679 PR_COPY_FIELD(src, dest, pr_bindpro);
2673 2680 PR_COPY_FIELD(src, dest, pr_bindpset);
2674 2681 PR_COPY_FIELD(src, dest, pr_lgrp);
2675 2682 }
2676 2683
2677 2684 void
2678 2685 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
2679 2686 {
2680 2687 bzero(dest, sizeof (*dest));
2681 2688
2682 2689 PR_COPY_FIELD(src, dest, pr_flag);
2683 2690 PR_COPY_FIELD(src, dest, pr_nlwp);
2684 2691 PR_COPY_FIELD(src, dest, pr_pid);
2685 2692 PR_COPY_FIELD(src, dest, pr_ppid);
2686 2693 PR_COPY_FIELD(src, dest, pr_pgid);
2687 2694 PR_COPY_FIELD(src, dest, pr_sid);
2688 2695 PR_COPY_FIELD(src, dest, pr_uid);
2689 2696 PR_COPY_FIELD(src, dest, pr_euid);
2690 2697 PR_COPY_FIELD(src, dest, pr_gid);
2691 2698 PR_COPY_FIELD(src, dest, pr_egid);
2692 2699 PR_IGNORE_FIELD(src, dest, pr_addr);
2693 2700 PR_COPY_FIELD_ILP32(src, dest, pr_size);
2694 2701 PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
2695 2702 PR_COPY_FIELD(src, dest, pr_ttydev);
2696 2703 PR_COPY_FIELD(src, dest, pr_pctcpu);
2697 2704 PR_COPY_FIELD(src, dest, pr_pctmem);
2698 2705 PR_COPY_TIMESPEC(src, dest, pr_start);
2699 2706 PR_COPY_TIMESPEC(src, dest, pr_time);
2700 2707 PR_COPY_TIMESPEC(src, dest, pr_ctime);
2701 2708 PR_COPY_BUF(src, dest, pr_fname);
2702 2709 PR_COPY_BUF(src, dest, pr_psargs);
2703 2710 PR_COPY_FIELD(src, dest, pr_wstat);
2704 2711 PR_COPY_FIELD(src, dest, pr_argc);
2705 2712 PR_COPY_FIELD_ILP32(src, dest, pr_argv);
2706 2713 PR_COPY_FIELD_ILP32(src, dest, pr_envp);
2707 2714 PR_COPY_FIELD(src, dest, pr_dmodel);
2708 2715 PR_COPY_FIELD(src, dest, pr_taskid);
2709 2716 PR_COPY_FIELD(src, dest, pr_projid);
2710 2717 PR_COPY_FIELD(src, dest, pr_nzomb);
2711 2718 PR_COPY_FIELD(src, dest, pr_poolid);
2712 2719 PR_COPY_FIELD(src, dest, pr_contract);
2713 2720 PR_COPY_FIELD(src, dest, pr_poolid);
2714 2721 PR_COPY_FIELD(src, dest, pr_poolid);
2715 2722
2716 2723 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
2717 2724 }
2718 2725
2719 2726 #undef PR_COPY_FIELD
2720 2727 #undef PR_COPY_FIELD_ILP32
2721 2728 #undef PR_COPY_TIMESPEC
2722 2729 #undef PR_COPY_BUF
2723 2730 #undef PR_IGNORE_FIELD
2724 2731
2725 2732 #endif /* _SYSCALL32_IMPL */
2726 2733
2727 2734 /*
2728 2735 * This used to get called when microstate accounting was disabled but
2729 2736 * microstate information was requested. Since Microstate accounting is on
2730 2737 * regardless of the proc flags, this simply makes it appear to procfs that
2731 2738 * microstate accounting is on. This is relatively meaningless since you
2732 2739 * can't turn it off, but this is here for the sake of appearances.
2733 2740 */
2734 2741
2735 2742 /*ARGSUSED*/
2736 2743 void
2737 2744 estimate_msacct(kthread_t *t, hrtime_t curtime)
2738 2745 {
2739 2746 proc_t *p;
2740 2747
2741 2748 if (t == NULL)
2742 2749 return;
2743 2750
2744 2751 p = ttoproc(t);
2745 2752 ASSERT(MUTEX_HELD(&p->p_lock));
2746 2753
2747 2754 /*
2748 2755 * A system process (p0) could be referenced if the thread is
2749 2756 * in the process of exiting. Don't turn on microstate accounting
2750 2757 * in that case.
2751 2758 */
2752 2759 if (p->p_flag & SSYS)
2753 2760 return;
2754 2761
2755 2762 /*
2756 2763 * Loop through all the LWPs (kernel threads) in the process.
2757 2764 */
2758 2765 t = p->p_tlist;
2759 2766 do {
2760 2767 t->t_proc_flag |= TP_MSACCT;
2761 2768 } while ((t = t->t_forw) != p->p_tlist);
2762 2769
2763 2770 p->p_flag |= SMSACCT; /* set process-wide MSACCT */
2764 2771 }
2765 2772
2766 2773 /*
2767 2774 * It's not really possible to disable microstate accounting anymore.
2768 2775 * However, this routine simply turns off the ms accounting flags in a process
2769 2776 * This way procfs can still pretend to turn microstate accounting on and
2770 2777 * off for a process, but it actually doesn't do anything. This is
2771 2778 * a neutered form of preemptive idiot-proofing.
2772 2779 */
2773 2780 void
2774 2781 disable_msacct(proc_t *p)
2775 2782 {
2776 2783 kthread_t *t;
2777 2784
2778 2785 ASSERT(MUTEX_HELD(&p->p_lock));
2779 2786
2780 2787 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */
2781 2788 /*
2782 2789 * Loop through all the LWPs (kernel threads) in the process.
2783 2790 */
2784 2791 if ((t = p->p_tlist) != NULL) {
2785 2792 do {
2786 2793 /* clear per-thread flag */
2787 2794 t->t_proc_flag &= ~TP_MSACCT;
2788 2795 } while ((t = t->t_forw) != p->p_tlist);
2789 2796 }
2790 2797 }
2791 2798
2792 2799 /*
2793 2800 * Return resource usage information.
2794 2801 */
2795 2802 void
2796 2803 prgetusage(kthread_t *t, prhusage_t *pup)
2797 2804 {
2798 2805 klwp_t *lwp = ttolwp(t);
2799 2806 hrtime_t *mstimep;
2800 2807 struct mstate *ms = &lwp->lwp_mstate;
2801 2808 int state;
2802 2809 int i;
2803 2810 hrtime_t curtime;
2804 2811 hrtime_t waitrq;
2805 2812 hrtime_t tmp1;
2806 2813
2807 2814 curtime = gethrtime_unscaled();
2808 2815
2809 2816 pup->pr_lwpid = t->t_tid;
2810 2817 pup->pr_count = 1;
2811 2818 pup->pr_create = ms->ms_start;
2812 2819 pup->pr_term = ms->ms_term;
2813 2820 scalehrtime(&pup->pr_create);
2814 2821 scalehrtime(&pup->pr_term);
2815 2822 if (ms->ms_term == 0) {
2816 2823 pup->pr_rtime = curtime - ms->ms_start;
2817 2824 scalehrtime(&pup->pr_rtime);
2818 2825 } else {
2819 2826 pup->pr_rtime = ms->ms_term - ms->ms_start;
2820 2827 scalehrtime(&pup->pr_rtime);
2821 2828 }
2822 2829
2823 2830
2824 2831 pup->pr_utime = ms->ms_acct[LMS_USER];
2825 2832 pup->pr_stime = ms->ms_acct[LMS_SYSTEM];
2826 2833 pup->pr_ttime = ms->ms_acct[LMS_TRAP];
2827 2834 pup->pr_tftime = ms->ms_acct[LMS_TFAULT];
2828 2835 pup->pr_dftime = ms->ms_acct[LMS_DFAULT];
2829 2836 pup->pr_kftime = ms->ms_acct[LMS_KFAULT];
2830 2837 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2831 2838 pup->pr_slptime = ms->ms_acct[LMS_SLEEP];
2832 2839 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2833 2840 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2834 2841
2835 2842 prscaleusage(pup);
2836 2843
2837 2844 /*
2838 2845 * Adjust for time waiting in the dispatcher queue.
2839 2846 */
2840 2847 waitrq = t->t_waitrq; /* hopefully atomic */
2841 2848 if (waitrq != 0) {
2842 2849 if (waitrq > curtime) {
2843 2850 curtime = gethrtime_unscaled();
2844 2851 }
2845 2852 tmp1 = curtime - waitrq;
2846 2853 scalehrtime(&tmp1);
2847 2854 pup->pr_wtime += tmp1;
2848 2855 curtime = waitrq;
2849 2856 }
2850 2857
2851 2858 /*
2852 2859 * Adjust for time spent in current microstate.
2853 2860 */
2854 2861 if (ms->ms_state_start > curtime) {
2855 2862 curtime = gethrtime_unscaled();
2856 2863 }
2857 2864
2858 2865 i = 0;
2859 2866 do {
2860 2867 switch (state = t->t_mstate) {
2861 2868 case LMS_SLEEP:
2862 2869 /*
2863 2870 * Update the timer for the current sleep state.
2864 2871 */
2865 2872 switch (state = ms->ms_prev) {
2866 2873 case LMS_TFAULT:
2867 2874 case LMS_DFAULT:
2868 2875 case LMS_KFAULT:
2869 2876 case LMS_USER_LOCK:
2870 2877 break;
2871 2878 default:
2872 2879 state = LMS_SLEEP;
2873 2880 break;
2874 2881 }
2875 2882 break;
2876 2883 case LMS_TFAULT:
2877 2884 case LMS_DFAULT:
2878 2885 case LMS_KFAULT:
2879 2886 case LMS_USER_LOCK:
2880 2887 state = LMS_SYSTEM;
2881 2888 break;
2882 2889 }
2883 2890 switch (state) {
2884 2891 case LMS_USER: mstimep = &pup->pr_utime; break;
2885 2892 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
2886 2893 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
2887 2894 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
2888 2895 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
2889 2896 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
2890 2897 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
2891 2898 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
2892 2899 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
2893 2900 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
2894 2901 default: panic("prgetusage: unknown microstate");
2895 2902 }
2896 2903 tmp1 = curtime - ms->ms_state_start;
2897 2904 if (tmp1 < 0) {
2898 2905 curtime = gethrtime_unscaled();
2899 2906 i++;
2900 2907 continue;
2901 2908 }
2902 2909 scalehrtime(&tmp1);
2903 2910 } while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2904 2911
2905 2912 *mstimep += tmp1;
2906 2913
2907 2914 /* update pup timestamp */
2908 2915 pup->pr_tstamp = curtime;
2909 2916 scalehrtime(&pup->pr_tstamp);
2910 2917
2911 2918 /*
2912 2919 * Resource usage counters.
2913 2920 */
2914 2921 pup->pr_minf = lwp->lwp_ru.minflt;
2915 2922 pup->pr_majf = lwp->lwp_ru.majflt;
2916 2923 pup->pr_nswap = lwp->lwp_ru.nswap;
2917 2924 pup->pr_inblk = lwp->lwp_ru.inblock;
2918 2925 pup->pr_oublk = lwp->lwp_ru.oublock;
2919 2926 pup->pr_msnd = lwp->lwp_ru.msgsnd;
2920 2927 pup->pr_mrcv = lwp->lwp_ru.msgrcv;
2921 2928 pup->pr_sigs = lwp->lwp_ru.nsignals;
2922 2929 pup->pr_vctx = lwp->lwp_ru.nvcsw;
2923 2930 pup->pr_ictx = lwp->lwp_ru.nivcsw;
2924 2931 pup->pr_sysc = lwp->lwp_ru.sysc;
2925 2932 pup->pr_ioch = lwp->lwp_ru.ioch;
2926 2933 }
2927 2934
2928 2935 /*
2929 2936 * Convert ms_acct stats from unscaled high-res time to nanoseconds
2930 2937 */
2931 2938 void
2932 2939 prscaleusage(prhusage_t *usg)
2933 2940 {
2934 2941 scalehrtime(&usg->pr_utime);
2935 2942 scalehrtime(&usg->pr_stime);
2936 2943 scalehrtime(&usg->pr_ttime);
2937 2944 scalehrtime(&usg->pr_tftime);
2938 2945 scalehrtime(&usg->pr_dftime);
2939 2946 scalehrtime(&usg->pr_kftime);
2940 2947 scalehrtime(&usg->pr_ltime);
2941 2948 scalehrtime(&usg->pr_slptime);
2942 2949 scalehrtime(&usg->pr_wtime);
2943 2950 scalehrtime(&usg->pr_stoptime);
2944 2951 }
2945 2952
2946 2953
2947 2954 /*
2948 2955 * Sum resource usage information.
2949 2956 */
2950 2957 void
2951 2958 praddusage(kthread_t *t, prhusage_t *pup)
2952 2959 {
2953 2960 klwp_t *lwp = ttolwp(t);
2954 2961 hrtime_t *mstimep;
2955 2962 struct mstate *ms = &lwp->lwp_mstate;
2956 2963 int state;
2957 2964 int i;
2958 2965 hrtime_t curtime;
2959 2966 hrtime_t waitrq;
2960 2967 hrtime_t tmp;
2961 2968 prhusage_t conv;
2962 2969
2963 2970 curtime = gethrtime_unscaled();
2964 2971
2965 2972 if (ms->ms_term == 0) {
2966 2973 tmp = curtime - ms->ms_start;
2967 2974 scalehrtime(&tmp);
2968 2975 pup->pr_rtime += tmp;
2969 2976 } else {
2970 2977 tmp = ms->ms_term - ms->ms_start;
2971 2978 scalehrtime(&tmp);
2972 2979 pup->pr_rtime += tmp;
2973 2980 }
2974 2981
2975 2982 conv.pr_utime = ms->ms_acct[LMS_USER];
2976 2983 conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2977 2984 conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2978 2985 conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2979 2986 conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2980 2987 conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2981 2988 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2982 2989 conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2983 2990 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2984 2991 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2985 2992
2986 2993 prscaleusage(&conv);
2987 2994
2988 2995 pup->pr_utime += conv.pr_utime;
2989 2996 pup->pr_stime += conv.pr_stime;
2990 2997 pup->pr_ttime += conv.pr_ttime;
2991 2998 pup->pr_tftime += conv.pr_tftime;
2992 2999 pup->pr_dftime += conv.pr_dftime;
2993 3000 pup->pr_kftime += conv.pr_kftime;
2994 3001 pup->pr_ltime += conv.pr_ltime;
2995 3002 pup->pr_slptime += conv.pr_slptime;
2996 3003 pup->pr_wtime += conv.pr_wtime;
2997 3004 pup->pr_stoptime += conv.pr_stoptime;
2998 3005
2999 3006 /*
3000 3007 * Adjust for time waiting in the dispatcher queue.
3001 3008 */
3002 3009 waitrq = t->t_waitrq; /* hopefully atomic */
3003 3010 if (waitrq != 0) {
3004 3011 if (waitrq > curtime) {
3005 3012 curtime = gethrtime_unscaled();
3006 3013 }
3007 3014 tmp = curtime - waitrq;
3008 3015 scalehrtime(&tmp);
3009 3016 pup->pr_wtime += tmp;
3010 3017 curtime = waitrq;
3011 3018 }
3012 3019
3013 3020 /*
3014 3021 * Adjust for time spent in current microstate.
3015 3022 */
3016 3023 if (ms->ms_state_start > curtime) {
3017 3024 curtime = gethrtime_unscaled();
3018 3025 }
3019 3026
3020 3027 i = 0;
3021 3028 do {
3022 3029 switch (state = t->t_mstate) {
3023 3030 case LMS_SLEEP:
3024 3031 /*
3025 3032 * Update the timer for the current sleep state.
3026 3033 */
3027 3034 switch (state = ms->ms_prev) {
3028 3035 case LMS_TFAULT:
3029 3036 case LMS_DFAULT:
3030 3037 case LMS_KFAULT:
3031 3038 case LMS_USER_LOCK:
3032 3039 break;
3033 3040 default:
3034 3041 state = LMS_SLEEP;
3035 3042 break;
3036 3043 }
3037 3044 break;
3038 3045 case LMS_TFAULT:
3039 3046 case LMS_DFAULT:
3040 3047 case LMS_KFAULT:
3041 3048 case LMS_USER_LOCK:
3042 3049 state = LMS_SYSTEM;
3043 3050 break;
3044 3051 }
3045 3052 switch (state) {
3046 3053 case LMS_USER: mstimep = &pup->pr_utime; break;
3047 3054 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
3048 3055 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
3049 3056 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
3050 3057 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
3051 3058 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
3052 3059 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
3053 3060 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
3054 3061 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
3055 3062 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
3056 3063 default: panic("praddusage: unknown microstate");
3057 3064 }
3058 3065 tmp = curtime - ms->ms_state_start;
3059 3066 if (tmp < 0) {
3060 3067 curtime = gethrtime_unscaled();
3061 3068 i++;
3062 3069 continue;
3063 3070 }
3064 3071 scalehrtime(&tmp);
3065 3072 } while (tmp < 0 && i < MAX_ITERS_SPIN);
3066 3073
3067 3074 *mstimep += tmp;
3068 3075
3069 3076 /* update pup timestamp */
3070 3077 pup->pr_tstamp = curtime;
3071 3078 scalehrtime(&pup->pr_tstamp);
3072 3079
3073 3080 /*
3074 3081 * Resource usage counters.
3075 3082 */
3076 3083 pup->pr_minf += lwp->lwp_ru.minflt;
3077 3084 pup->pr_majf += lwp->lwp_ru.majflt;
3078 3085 pup->pr_nswap += lwp->lwp_ru.nswap;
3079 3086 pup->pr_inblk += lwp->lwp_ru.inblock;
3080 3087 pup->pr_oublk += lwp->lwp_ru.oublock;
3081 3088 pup->pr_msnd += lwp->lwp_ru.msgsnd;
3082 3089 pup->pr_mrcv += lwp->lwp_ru.msgrcv;
3083 3090 pup->pr_sigs += lwp->lwp_ru.nsignals;
3084 3091 pup->pr_vctx += lwp->lwp_ru.nvcsw;
3085 3092 pup->pr_ictx += lwp->lwp_ru.nivcsw;
3086 3093 pup->pr_sysc += lwp->lwp_ru.sysc;
3087 3094 pup->pr_ioch += lwp->lwp_ru.ioch;
3088 3095 }
3089 3096
3090 3097 /*
3091 3098 * Convert a prhusage_t to a prusage_t.
3092 3099 * This means convert each hrtime_t to a timestruc_t
3093 3100 * and copy the count fields uint64_t => ulong_t.
3094 3101 */
3095 3102 void
3096 3103 prcvtusage(prhusage_t *pup, prusage_t *upup)
3097 3104 {
3098 3105 uint64_t *ullp;
3099 3106 ulong_t *ulp;
3100 3107 int i;
3101 3108
3102 3109 upup->pr_lwpid = pup->pr_lwpid;
3103 3110 upup->pr_count = pup->pr_count;
3104 3111
3105 3112 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp);
3106 3113 hrt2ts(pup->pr_create, &upup->pr_create);
3107 3114 hrt2ts(pup->pr_term, &upup->pr_term);
3108 3115 hrt2ts(pup->pr_rtime, &upup->pr_rtime);
3109 3116 hrt2ts(pup->pr_utime, &upup->pr_utime);
3110 3117 hrt2ts(pup->pr_stime, &upup->pr_stime);
3111 3118 hrt2ts(pup->pr_ttime, &upup->pr_ttime);
3112 3119 hrt2ts(pup->pr_tftime, &upup->pr_tftime);
3113 3120 hrt2ts(pup->pr_dftime, &upup->pr_dftime);
3114 3121 hrt2ts(pup->pr_kftime, &upup->pr_kftime);
3115 3122 hrt2ts(pup->pr_ltime, &upup->pr_ltime);
3116 3123 hrt2ts(pup->pr_slptime, &upup->pr_slptime);
3117 3124 hrt2ts(pup->pr_wtime, &upup->pr_wtime);
3118 3125 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3119 3126 bzero(upup->filltime, sizeof (upup->filltime));
3120 3127
3121 3128 ullp = &pup->pr_minf;
3122 3129 ulp = &upup->pr_minf;
3123 3130 for (i = 0; i < 22; i++)
3124 3131 *ulp++ = (ulong_t)*ullp++;
3125 3132 }
3126 3133
3127 3134 #ifdef _SYSCALL32_IMPL
3128 3135 void
3129 3136 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3130 3137 {
3131 3138 uint64_t *ullp;
3132 3139 uint32_t *ulp;
3133 3140 int i;
3134 3141
3135 3142 upup->pr_lwpid = pup->pr_lwpid;
3136 3143 upup->pr_count = pup->pr_count;
3137 3144
3138 3145 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp);
3139 3146 hrt2ts32(pup->pr_create, &upup->pr_create);
3140 3147 hrt2ts32(pup->pr_term, &upup->pr_term);
3141 3148 hrt2ts32(pup->pr_rtime, &upup->pr_rtime);
3142 3149 hrt2ts32(pup->pr_utime, &upup->pr_utime);
3143 3150 hrt2ts32(pup->pr_stime, &upup->pr_stime);
3144 3151 hrt2ts32(pup->pr_ttime, &upup->pr_ttime);
3145 3152 hrt2ts32(pup->pr_tftime, &upup->pr_tftime);
3146 3153 hrt2ts32(pup->pr_dftime, &upup->pr_dftime);
3147 3154 hrt2ts32(pup->pr_kftime, &upup->pr_kftime);
3148 3155 hrt2ts32(pup->pr_ltime, &upup->pr_ltime);
3149 3156 hrt2ts32(pup->pr_slptime, &upup->pr_slptime);
3150 3157 hrt2ts32(pup->pr_wtime, &upup->pr_wtime);
3151 3158 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime);
3152 3159 bzero(upup->filltime, sizeof (upup->filltime));
3153 3160
3154 3161 ullp = &pup->pr_minf;
3155 3162 ulp = &upup->pr_minf;
3156 3163 for (i = 0; i < 22; i++)
3157 3164 *ulp++ = (uint32_t)*ullp++;
3158 3165 }
3159 3166 #endif /* _SYSCALL32_IMPL */
3160 3167
3161 3168 /*
3162 3169 * Determine whether a set is empty.
3163 3170 */
3164 3171 int
3165 3172 setisempty(uint32_t *sp, uint_t n)
3166 3173 {
3167 3174 while (n--)
3168 3175 if (*sp++)
3169 3176 return (0);
3170 3177 return (1);
3171 3178 }
3172 3179
3173 3180 /*
3174 3181 * Utility routine for establishing a watched area in the process.
3175 3182 * Keep the list of watched areas sorted by virtual address.
3176 3183 */
3177 3184 int
3178 3185 set_watched_area(proc_t *p, struct watched_area *pwa)
3179 3186 {
3180 3187 caddr_t vaddr = pwa->wa_vaddr;
3181 3188 caddr_t eaddr = pwa->wa_eaddr;
3182 3189 ulong_t flags = pwa->wa_flags;
3183 3190 struct watched_area *target;
3184 3191 avl_index_t where;
3185 3192 int error = 0;
3186 3193
3187 3194 /* we must not be holding p->p_lock, but the process must be locked */
3188 3195 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3189 3196 ASSERT(p->p_proc_flag & P_PR_LOCK);
3190 3197
3191 3198 /*
3192 3199 * If this is our first watchpoint, enable watchpoints for the process.
3193 3200 */
3194 3201 if (!pr_watch_active(p)) {
3195 3202 kthread_t *t;
3196 3203
3197 3204 mutex_enter(&p->p_lock);
3198 3205 if ((t = p->p_tlist) != NULL) {
3199 3206 do {
3200 3207 watch_enable(t);
3201 3208 } while ((t = t->t_forw) != p->p_tlist);
3202 3209 }
3203 3210 mutex_exit(&p->p_lock);
3204 3211 }
3205 3212
3206 3213 target = pr_find_watched_area(p, pwa, &where);
3207 3214 if (target != NULL) {
3208 3215 /*
3209 3216 * We discovered an existing, overlapping watched area.
3210 3217 * Allow it only if it is an exact match.
3211 3218 */
3212 3219 if (target->wa_vaddr != vaddr ||
3213 3220 target->wa_eaddr != eaddr)
3214 3221 error = EINVAL;
3215 3222 else if (target->wa_flags != flags) {
3216 3223 error = set_watched_page(p, vaddr, eaddr,
3217 3224 flags, target->wa_flags);
3218 3225 target->wa_flags = flags;
3219 3226 }
3220 3227 kmem_free(pwa, sizeof (struct watched_area));
3221 3228 } else {
3222 3229 avl_insert(&p->p_warea, pwa, where);
3223 3230 error = set_watched_page(p, vaddr, eaddr, flags, 0);
3224 3231 }
3225 3232
3226 3233 return (error);
3227 3234 }
3228 3235
3229 3236 /*
3230 3237 * Utility routine for clearing a watched area in the process.
3231 3238 * Must be an exact match of the virtual address.
3232 3239 * size and flags don't matter.
3233 3240 */
3234 3241 int
3235 3242 clear_watched_area(proc_t *p, struct watched_area *pwa)
3236 3243 {
3237 3244 struct watched_area *found;
3238 3245
3239 3246 /* we must not be holding p->p_lock, but the process must be locked */
3240 3247 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3241 3248 ASSERT(p->p_proc_flag & P_PR_LOCK);
3242 3249
3243 3250
3244 3251 if (!pr_watch_active(p)) {
3245 3252 kmem_free(pwa, sizeof (struct watched_area));
3246 3253 return (0);
3247 3254 }
3248 3255
3249 3256 /*
3250 3257 * Look for a matching address in the watched areas. If a match is
3251 3258 * found, clear the old watched area and adjust the watched page(s). It
3252 3259 * is not an error if there is no match.
3253 3260 */
3254 3261 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3255 3262 found->wa_vaddr == pwa->wa_vaddr) {
3256 3263 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3257 3264 found->wa_flags);
3258 3265 avl_remove(&p->p_warea, found);
3259 3266 kmem_free(found, sizeof (struct watched_area));
3260 3267 }
3261 3268
3262 3269 kmem_free(pwa, sizeof (struct watched_area));
3263 3270
3264 3271 /*
3265 3272 * If we removed the last watched area from the process, disable
3266 3273 * watchpoints.
3267 3274 */
3268 3275 if (!pr_watch_active(p)) {
3269 3276 kthread_t *t;
3270 3277
3271 3278 mutex_enter(&p->p_lock);
3272 3279 if ((t = p->p_tlist) != NULL) {
3273 3280 do {
3274 3281 watch_disable(t);
3275 3282 } while ((t = t->t_forw) != p->p_tlist);
3276 3283 }
3277 3284 mutex_exit(&p->p_lock);
3278 3285 }
3279 3286
3280 3287 return (0);
3281 3288 }
3282 3289
3283 3290 /*
3284 3291 * Frees all the watched_area structures
3285 3292 */
3286 3293 void
3287 3294 pr_free_watchpoints(proc_t *p)
3288 3295 {
3289 3296 struct watched_area *delp;
3290 3297 void *cookie;
3291 3298
3292 3299 cookie = NULL;
3293 3300 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3294 3301 kmem_free(delp, sizeof (struct watched_area));
3295 3302
3296 3303 avl_destroy(&p->p_warea);
3297 3304 }
3298 3305
3299 3306 /*
3300 3307 * This one is called by the traced process to unwatch all the
3301 3308 * pages while deallocating the list of watched_page structs.
3302 3309 */
3303 3310 void
3304 3311 pr_free_watched_pages(proc_t *p)
3305 3312 {
3306 3313 struct as *as = p->p_as;
3307 3314 struct watched_page *pwp;
3308 3315 uint_t prot;
3309 3316 int retrycnt, err;
3310 3317 void *cookie;
3311 3318
3312 3319 if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3313 3320 return;
3314 3321
3315 3322 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3316 3323 AS_LOCK_ENTER(as, RW_WRITER);
3317 3324
3318 3325 pwp = avl_first(&as->a_wpage);
3319 3326
3320 3327 cookie = NULL;
3321 3328 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3322 3329 retrycnt = 0;
3323 3330 if ((prot = pwp->wp_oprot) != 0) {
3324 3331 caddr_t addr = pwp->wp_vaddr;
3325 3332 struct seg *seg;
3326 3333 retry:
3327 3334
3328 3335 if ((pwp->wp_prot != prot ||
3329 3336 (pwp->wp_flags & WP_NOWATCH)) &&
3330 3337 (seg = as_segat(as, addr)) != NULL) {
3331 3338 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3332 3339 if (err == IE_RETRY) {
3333 3340 ASSERT(retrycnt == 0);
3334 3341 retrycnt++;
3335 3342 goto retry;
3336 3343 }
3337 3344 }
3338 3345 }
3339 3346 kmem_free(pwp, sizeof (struct watched_page));
3340 3347 }
3341 3348
3342 3349 avl_destroy(&as->a_wpage);
3343 3350 p->p_wprot = NULL;
3344 3351
3345 3352 AS_LOCK_EXIT(as);
3346 3353 }
3347 3354
3348 3355 /*
3349 3356 * Insert a watched area into the list of watched pages.
3350 3357 * If oflags is zero then we are adding a new watched area.
3351 3358 * Otherwise we are changing the flags of an existing watched area.
3352 3359 */
3353 3360 static int
3354 3361 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3355 3362 ulong_t flags, ulong_t oflags)
3356 3363 {
3357 3364 struct as *as = p->p_as;
3358 3365 avl_tree_t *pwp_tree;
3359 3366 struct watched_page *pwp, *newpwp;
3360 3367 struct watched_page tpw;
3361 3368 avl_index_t where;
3362 3369 struct seg *seg;
3363 3370 uint_t prot;
3364 3371 caddr_t addr;
3365 3372
3366 3373 /*
3367 3374 * We need to pre-allocate a list of structures before we grab the
3368 3375 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3369 3376 * held.
3370 3377 */
3371 3378 newpwp = NULL;
3372 3379 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3373 3380 addr < eaddr; addr += PAGESIZE) {
3374 3381 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3375 3382 pwp->wp_list = newpwp;
3376 3383 newpwp = pwp;
3377 3384 }
3378 3385
3379 3386 AS_LOCK_ENTER(as, RW_WRITER);
3380 3387
3381 3388 /*
3382 3389 * Search for an existing watched page to contain the watched area.
3383 3390 * If none is found, grab a new one from the available list
3384 3391 * and insert it in the active list, keeping the list sorted
3385 3392 * by user-level virtual address.
3386 3393 */
3387 3394 if (p->p_flag & SVFWAIT)
3388 3395 pwp_tree = &p->p_wpage;
3389 3396 else
3390 3397 pwp_tree = &as->a_wpage;
3391 3398
3392 3399 again:
3393 3400 if (avl_numnodes(pwp_tree) > prnwatch) {
3394 3401 AS_LOCK_EXIT(as);
3395 3402 while (newpwp != NULL) {
3396 3403 pwp = newpwp->wp_list;
3397 3404 kmem_free(newpwp, sizeof (struct watched_page));
3398 3405 newpwp = pwp;
3399 3406 }
3400 3407 return (E2BIG);
3401 3408 }
3402 3409
3403 3410 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3404 3411 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3405 3412 pwp = newpwp;
3406 3413 newpwp = newpwp->wp_list;
3407 3414 pwp->wp_list = NULL;
3408 3415 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3409 3416 (uintptr_t)PAGEMASK);
3410 3417 avl_insert(pwp_tree, pwp, where);
3411 3418 }
3412 3419
3413 3420 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3414 3421
3415 3422 if (oflags & WA_READ)
3416 3423 pwp->wp_read--;
3417 3424 if (oflags & WA_WRITE)
3418 3425 pwp->wp_write--;
3419 3426 if (oflags & WA_EXEC)
3420 3427 pwp->wp_exec--;
3421 3428
3422 3429 ASSERT(pwp->wp_read >= 0);
3423 3430 ASSERT(pwp->wp_write >= 0);
3424 3431 ASSERT(pwp->wp_exec >= 0);
3425 3432
3426 3433 if (flags & WA_READ)
3427 3434 pwp->wp_read++;
3428 3435 if (flags & WA_WRITE)
3429 3436 pwp->wp_write++;
3430 3437 if (flags & WA_EXEC)
3431 3438 pwp->wp_exec++;
3432 3439
3433 3440 if (!(p->p_flag & SVFWAIT)) {
3434 3441 vaddr = pwp->wp_vaddr;
3435 3442 if (pwp->wp_oprot == 0 &&
3436 3443 (seg = as_segat(as, vaddr)) != NULL) {
3437 3444 SEGOP_GETPROT(seg, vaddr, 0, &prot);
3438 3445 pwp->wp_oprot = (uchar_t)prot;
3439 3446 pwp->wp_prot = (uchar_t)prot;
3440 3447 }
3441 3448 if (pwp->wp_oprot != 0) {
3442 3449 prot = pwp->wp_oprot;
3443 3450 if (pwp->wp_read)
3444 3451 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3445 3452 if (pwp->wp_write)
3446 3453 prot &= ~PROT_WRITE;
3447 3454 if (pwp->wp_exec)
3448 3455 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3449 3456 if (!(pwp->wp_flags & WP_NOWATCH) &&
3450 3457 pwp->wp_prot != prot &&
3451 3458 (pwp->wp_flags & WP_SETPROT) == 0) {
3452 3459 pwp->wp_flags |= WP_SETPROT;
3453 3460 pwp->wp_list = p->p_wprot;
3454 3461 p->p_wprot = pwp;
3455 3462 }
3456 3463 pwp->wp_prot = (uchar_t)prot;
3457 3464 }
3458 3465 }
3459 3466
3460 3467 /*
3461 3468 * If the watched area extends into the next page then do
3462 3469 * it over again with the virtual address of the next page.
3463 3470 */
3464 3471 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3465 3472 goto again;
3466 3473
3467 3474 AS_LOCK_EXIT(as);
3468 3475
3469 3476 /*
3470 3477 * Free any pages we may have over-allocated
3471 3478 */
3472 3479 while (newpwp != NULL) {
3473 3480 pwp = newpwp->wp_list;
3474 3481 kmem_free(newpwp, sizeof (struct watched_page));
3475 3482 newpwp = pwp;
3476 3483 }
3477 3484
3478 3485 return (0);
3479 3486 }
3480 3487
3481 3488 /*
3482 3489 * Remove a watched area from the list of watched pages.
3483 3490 * A watched area may extend over more than one page.
3484 3491 */
3485 3492 static void
3486 3493 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3487 3494 {
3488 3495 struct as *as = p->p_as;
3489 3496 struct watched_page *pwp;
3490 3497 struct watched_page tpw;
3491 3498 avl_tree_t *tree;
3492 3499 avl_index_t where;
3493 3500
3494 3501 AS_LOCK_ENTER(as, RW_WRITER);
3495 3502
3496 3503 if (p->p_flag & SVFWAIT)
3497 3504 tree = &p->p_wpage;
3498 3505 else
3499 3506 tree = &as->a_wpage;
3500 3507
3501 3508 tpw.wp_vaddr = vaddr =
3502 3509 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3503 3510 pwp = avl_find(tree, &tpw, &where);
3504 3511 if (pwp == NULL)
3505 3512 pwp = avl_nearest(tree, where, AVL_AFTER);
3506 3513
3507 3514 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3508 3515 ASSERT(vaddr <= pwp->wp_vaddr);
3509 3516
3510 3517 if (flags & WA_READ)
3511 3518 pwp->wp_read--;
3512 3519 if (flags & WA_WRITE)
3513 3520 pwp->wp_write--;
3514 3521 if (flags & WA_EXEC)
3515 3522 pwp->wp_exec--;
3516 3523
3517 3524 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3518 3525 /*
3519 3526 * Reset the hat layer's protections on this page.
3520 3527 */
3521 3528 if (pwp->wp_oprot != 0) {
3522 3529 uint_t prot = pwp->wp_oprot;
3523 3530
3524 3531 if (pwp->wp_read)
3525 3532 prot &=
3526 3533 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3527 3534 if (pwp->wp_write)
3528 3535 prot &= ~PROT_WRITE;
3529 3536 if (pwp->wp_exec)
3530 3537 prot &=
3531 3538 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3532 3539 if (!(pwp->wp_flags & WP_NOWATCH) &&
3533 3540 pwp->wp_prot != prot &&
3534 3541 (pwp->wp_flags & WP_SETPROT) == 0) {
3535 3542 pwp->wp_flags |= WP_SETPROT;
3536 3543 pwp->wp_list = p->p_wprot;
3537 3544 p->p_wprot = pwp;
3538 3545 }
3539 3546 pwp->wp_prot = (uchar_t)prot;
3540 3547 }
3541 3548 } else {
3542 3549 /*
3543 3550 * No watched areas remain in this page.
3544 3551 * Reset everything to normal.
3545 3552 */
3546 3553 if (pwp->wp_oprot != 0) {
3547 3554 pwp->wp_prot = pwp->wp_oprot;
3548 3555 if ((pwp->wp_flags & WP_SETPROT) == 0) {
3549 3556 pwp->wp_flags |= WP_SETPROT;
3550 3557 pwp->wp_list = p->p_wprot;
3551 3558 p->p_wprot = pwp;
3552 3559 }
3553 3560 }
3554 3561 }
3555 3562
3556 3563 pwp = AVL_NEXT(tree, pwp);
3557 3564 }
3558 3565
3559 3566 AS_LOCK_EXIT(as);
3560 3567 }
3561 3568
3562 3569 /*
3563 3570 * Return the original protections for the specified page.
3564 3571 */
3565 3572 static void
3566 3573 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3567 3574 {
3568 3575 struct watched_page *pwp;
3569 3576 struct watched_page tpw;
3570 3577
3571 3578 ASSERT(AS_LOCK_HELD(as));
3572 3579
3573 3580 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3574 3581 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3575 3582 *prot = pwp->wp_oprot;
3576 3583 }
3577 3584
3578 3585 static prpagev_t *
3579 3586 pr_pagev_create(struct seg *seg, int check_noreserve)
3580 3587 {
3581 3588 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3582 3589 size_t total_pages = seg_pages(seg);
3583 3590
3584 3591 /*
3585 3592 * Limit the size of our vectors to pagev_lim pages at a time. We need
3586 3593 * 4 or 5 bytes of storage per page, so this means we limit ourself
3587 3594 * to about a megabyte of kernel heap by default.
3588 3595 */
3589 3596 pagev->pg_npages = MIN(total_pages, pagev_lim);
3590 3597 pagev->pg_pnbase = 0;
3591 3598
3592 3599 pagev->pg_protv =
3593 3600 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3594 3601
3595 3602 if (check_noreserve)
3596 3603 pagev->pg_incore =
3597 3604 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3598 3605 else
3599 3606 pagev->pg_incore = NULL;
3600 3607
3601 3608 return (pagev);
3602 3609 }
3603 3610
3604 3611 static void
3605 3612 pr_pagev_destroy(prpagev_t *pagev)
3606 3613 {
3607 3614 if (pagev->pg_incore != NULL)
3608 3615 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3609 3616
3610 3617 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3611 3618 kmem_free(pagev, sizeof (prpagev_t));
3612 3619 }
3613 3620
3614 3621 static caddr_t
3615 3622 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3616 3623 {
3617 3624 ulong_t lastpg = seg_page(seg, eaddr - 1);
3618 3625 ulong_t pn, pnlim;
3619 3626 caddr_t saddr;
3620 3627 size_t len;
3621 3628
3622 3629 ASSERT(addr >= seg->s_base && addr <= eaddr);
3623 3630
3624 3631 if (addr == eaddr)
3625 3632 return (eaddr);
3626 3633
3627 3634 refill:
3628 3635 ASSERT(addr < eaddr);
3629 3636 pagev->pg_pnbase = seg_page(seg, addr);
3630 3637 pnlim = pagev->pg_pnbase + pagev->pg_npages;
3631 3638 saddr = addr;
3632 3639
3633 3640 if (lastpg < pnlim)
3634 3641 len = (size_t)(eaddr - addr);
3635 3642 else
3636 3643 len = pagev->pg_npages * PAGESIZE;
3637 3644
3638 3645 if (pagev->pg_incore != NULL) {
3639 3646 /*
3640 3647 * INCORE cleverly has different semantics than GETPROT:
3641 3648 * it returns info on pages up to but NOT including addr + len.
3642 3649 */
3643 3650 SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3644 3651 pn = pagev->pg_pnbase;
3645 3652
3646 3653 do {
3647 3654 /*
3648 3655 * Guilty knowledge here: We know that segvn_incore
3649 3656 * returns more than just the low-order bit that
3650 3657 * indicates the page is actually in memory. If any
3651 3658 * bits are set, then the page has backing store.
3652 3659 */
3653 3660 if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3654 3661 goto out;
3655 3662
3656 3663 } while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3657 3664
3658 3665 /*
3659 3666 * If we examined all the pages in the vector but we're not
3660 3667 * at the end of the segment, take another lap.
3661 3668 */
3662 3669 if (addr < eaddr)
3663 3670 goto refill;
3664 3671 }
3665 3672
3666 3673 /*
3667 3674 * Need to take len - 1 because addr + len is the address of the
3668 3675 * first byte of the page just past the end of what we want.
3669 3676 */
3670 3677 out:
3671 3678 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3672 3679 return (addr);
3673 3680 }
3674 3681
3675 3682 static caddr_t
3676 3683 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3677 3684 caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3678 3685 {
3679 3686 /*
3680 3687 * Our starting address is either the specified address, or the base
3681 3688 * address from the start of the pagev. If the latter is greater,
3682 3689 * this means a previous call to pr_pagev_fill has already scanned
3683 3690 * further than the end of the previous mapping.
3684 3691 */
3685 3692 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3686 3693 caddr_t addr = MAX(*saddrp, base);
3687 3694 ulong_t pn = seg_page(seg, addr);
3688 3695 uint_t prot, nprot;
3689 3696
3690 3697 /*
3691 3698 * If we're dealing with noreserve pages, then advance addr to
3692 3699 * the address of the next page which has backing store.
3693 3700 */
3694 3701 if (pagev->pg_incore != NULL) {
3695 3702 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3696 3703 if ((addr += PAGESIZE) == eaddr) {
3697 3704 *saddrp = addr;
3698 3705 prot = 0;
3699 3706 goto out;
3700 3707 }
3701 3708 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3702 3709 addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3703 3710 if (addr == eaddr) {
3704 3711 *saddrp = addr;
3705 3712 prot = 0;
3706 3713 goto out;
3707 3714 }
3708 3715 pn = seg_page(seg, addr);
3709 3716 }
3710 3717 }
3711 3718 }
3712 3719
3713 3720 /*
3714 3721 * Get the protections on the page corresponding to addr.
3715 3722 */
3716 3723 pn = seg_page(seg, addr);
3717 3724 ASSERT(pn >= pagev->pg_pnbase);
3718 3725 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3719 3726
3720 3727 prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3721 3728 getwatchprot(seg->s_as, addr, &prot);
3722 3729 *saddrp = addr;
3723 3730
3724 3731 /*
3725 3732 * Now loop until we find a backed page with different protections
3726 3733 * or we reach the end of this segment.
3727 3734 */
3728 3735 while ((addr += PAGESIZE) < eaddr) {
3729 3736 /*
3730 3737 * If pn has advanced to the page number following what we
3731 3738 * have information on, refill the page vector and reset
3732 3739 * addr and pn. If pr_pagev_fill does not return the
3733 3740 * address of the next page, we have a discontiguity and
3734 3741 * thus have reached the end of the current mapping.
3735 3742 */
3736 3743 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3737 3744 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3738 3745 if (naddr != addr)
3739 3746 goto out;
3740 3747 pn = seg_page(seg, addr);
3741 3748 }
3742 3749
3743 3750 /*
3744 3751 * The previous page's protections are in prot, and it has
3745 3752 * backing. If this page is MAP_NORESERVE and has no backing,
3746 3753 * then end this mapping and return the previous protections.
3747 3754 */
3748 3755 if (pagev->pg_incore != NULL &&
3749 3756 pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3750 3757 break;
3751 3758
3752 3759 /*
3753 3760 * Otherwise end the mapping if this page's protections (nprot)
3754 3761 * are different than those in the previous page (prot).
3755 3762 */
3756 3763 nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3757 3764 getwatchprot(seg->s_as, addr, &nprot);
3758 3765
3759 3766 if (nprot != prot)
3760 3767 break;
3761 3768 }
3762 3769
3763 3770 out:
3764 3771 *protp = prot;
3765 3772 return (addr);
3766 3773 }
3767 3774
3768 3775 size_t
3769 3776 pr_getsegsize(struct seg *seg, int reserved)
3770 3777 {
3771 3778 size_t size = seg->s_size;
3772 3779
3773 3780 /*
3774 3781 * If we're interested in the reserved space, return the size of the
3775 3782 * segment itself. Everything else in this function is a special case
3776 3783 * to determine the actual underlying size of various segment types.
3777 3784 */
3778 3785 if (reserved)
3779 3786 return (size);
3780 3787
3781 3788 /*
3782 3789 * If this is a segvn mapping of a regular file, return the smaller
3783 3790 * of the segment size and the remaining size of the file beyond
3784 3791 * the file offset corresponding to seg->s_base.
3785 3792 */
3786 3793 if (seg->s_ops == &segvn_ops) {
3787 3794 vattr_t vattr;
3788 3795 vnode_t *vp;
3789 3796
3790 3797 vattr.va_mask = AT_SIZE;
3791 3798
3792 3799 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3793 3800 vp != NULL && vp->v_type == VREG &&
3794 3801 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3795 3802
3796 3803 u_offset_t fsize = vattr.va_size;
3797 3804 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3798 3805
3799 3806 if (fsize < offset)
3800 3807 fsize = 0;
3801 3808 else
3802 3809 fsize -= offset;
3803 3810
3804 3811 fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3805 3812
3806 3813 if (fsize < (u_offset_t)size)
3807 3814 size = (size_t)fsize;
3808 3815 }
3809 3816
3810 3817 return (size);
3811 3818 }
3812 3819
3813 3820 /*
3814 3821 * If this is an ISM shared segment, don't include pages that are
3815 3822 * beyond the real size of the spt segment that backs it.
3816 3823 */
3817 3824 if (seg->s_ops == &segspt_shmops)
3818 3825 return (MIN(spt_realsize(seg), size));
3819 3826
3820 3827 /*
3821 3828 * If this is segment is a mapping from /dev/null, then this is a
3822 3829 * reservation of virtual address space and has no actual size.
3823 3830 * Such segments are backed by segdev and have type set to neither
3824 3831 * MAP_SHARED nor MAP_PRIVATE.
3825 3832 */
3826 3833 if (seg->s_ops == &segdev_ops &&
3827 3834 ((SEGOP_GETTYPE(seg, seg->s_base) &
3828 3835 (MAP_SHARED | MAP_PRIVATE)) == 0))
3829 3836 return (0);
3830 3837
3831 3838 /*
3832 3839 * If this segment doesn't match one of the special types we handle,
3833 3840 * just return the size of the segment itself.
3834 3841 */
3835 3842 return (size);
3836 3843 }
3837 3844
3838 3845 uint_t
3839 3846 pr_getprot(struct seg *seg, int reserved, void **tmp,
3840 3847 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3841 3848 {
3842 3849 struct as *as = seg->s_as;
3843 3850
3844 3851 caddr_t saddr = *saddrp;
3845 3852 caddr_t naddr;
3846 3853
3847 3854 int check_noreserve;
3848 3855 uint_t prot;
3849 3856
3850 3857 union {
3851 3858 struct segvn_data *svd;
3852 3859 struct segdev_data *sdp;
3853 3860 void *data;
3854 3861 } s;
3855 3862
3856 3863 s.data = seg->s_data;
3857 3864
3858 3865 ASSERT(AS_WRITE_HELD(as));
3859 3866 ASSERT(saddr >= seg->s_base && saddr < eaddr);
3860 3867 ASSERT(eaddr <= seg->s_base + seg->s_size);
3861 3868
3862 3869 /*
3863 3870 * Don't include MAP_NORESERVE pages in the address range
3864 3871 * unless their mappings have actually materialized.
3865 3872 * We cheat by knowing that segvn is the only segment
3866 3873 * driver that supports MAP_NORESERVE.
3867 3874 */
3868 3875 check_noreserve =
3869 3876 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3870 3877 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3871 3878 (s.svd->flags & MAP_NORESERVE));
3872 3879
3873 3880 /*
3874 3881 * Examine every page only as a last resort. We use guilty knowledge
3875 3882 * of segvn and segdev to avoid this: if there are no per-page
3876 3883 * protections present in the segment and we don't care about
3877 3884 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3878 3885 */
3879 3886 if (!check_noreserve && saddr == seg->s_base &&
3880 3887 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3881 3888 prot = s.svd->prot;
3882 3889 getwatchprot(as, saddr, &prot);
3883 3890 naddr = eaddr;
3884 3891
3885 3892 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3886 3893 s.sdp != NULL && s.sdp->pageprot == 0) {
3887 3894 prot = s.sdp->prot;
3888 3895 getwatchprot(as, saddr, &prot);
3889 3896 naddr = eaddr;
3890 3897
3891 3898 } else {
3892 3899 prpagev_t *pagev;
3893 3900
3894 3901 /*
3895 3902 * If addr is sitting at the start of the segment, then
3896 3903 * create a page vector to store protection and incore
3897 3904 * information for pages in the segment, and fill it.
3898 3905 * Otherwise, we expect *tmp to address the prpagev_t
3899 3906 * allocated by a previous call to this function.
3900 3907 */
3901 3908 if (saddr == seg->s_base) {
3902 3909 pagev = pr_pagev_create(seg, check_noreserve);
3903 3910 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3904 3911
3905 3912 ASSERT(*tmp == NULL);
3906 3913 *tmp = pagev;
3907 3914
3908 3915 ASSERT(saddr <= eaddr);
3909 3916 *saddrp = saddr;
3910 3917
3911 3918 if (saddr == eaddr) {
3912 3919 naddr = saddr;
3913 3920 prot = 0;
3914 3921 goto out;
3915 3922 }
3916 3923
3917 3924 } else {
3918 3925 ASSERT(*tmp != NULL);
3919 3926 pagev = (prpagev_t *)*tmp;
3920 3927 }
3921 3928
3922 3929 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3923 3930 ASSERT(naddr <= eaddr);
3924 3931 }
3925 3932
3926 3933 out:
3927 3934 if (naddr == eaddr)
3928 3935 pr_getprot_done(tmp);
3929 3936 *naddrp = naddr;
3930 3937 return (prot);
3931 3938 }
3932 3939
3933 3940 void
3934 3941 pr_getprot_done(void **tmp)
3935 3942 {
3936 3943 if (*tmp != NULL) {
3937 3944 pr_pagev_destroy((prpagev_t *)*tmp);
3938 3945 *tmp = NULL;
3939 3946 }
3940 3947 }
3941 3948
3942 3949 /*
3943 3950 * Return true iff the vnode is a /proc file from the object directory.
3944 3951 */
3945 3952 int
3946 3953 pr_isobject(vnode_t *vp)
3947 3954 {
3948 3955 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3949 3956 }
3950 3957
3951 3958 /*
3952 3959 * Return true iff the vnode is a /proc file opened by the process itself.
3953 3960 */
3954 3961 int
3955 3962 pr_isself(vnode_t *vp)
3956 3963 {
3957 3964 /*
3958 3965 * XXX: To retain binary compatibility with the old
3959 3966 * ioctl()-based version of /proc, we exempt self-opens
3960 3967 * of /proc/<pid> from being marked close-on-exec.
3961 3968 */
3962 3969 return (vn_matchops(vp, prvnodeops) &&
3963 3970 (VTOP(vp)->pr_flags & PR_ISSELF) &&
3964 3971 VTOP(vp)->pr_type != PR_PIDDIR);
3965 3972 }
3966 3973
3967 3974 static ssize_t
3968 3975 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3969 3976 {
3970 3977 ssize_t pagesize, hatsize;
3971 3978
3972 3979 ASSERT(AS_WRITE_HELD(seg->s_as));
3973 3980 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3974 3981 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3975 3982 ASSERT(saddr < eaddr);
3976 3983
3977 3984 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3978 3985 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3979 3986 ASSERT(pagesize != 0);
3980 3987
3981 3988 if (pagesize == -1)
3982 3989 pagesize = PAGESIZE;
3983 3990
3984 3991 saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3985 3992
3986 3993 while (saddr < eaddr) {
3987 3994 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3988 3995 break;
3989 3996 ASSERT(IS_P2ALIGNED(saddr, pagesize));
3990 3997 saddr += pagesize;
3991 3998 }
3992 3999
3993 4000 *naddrp = ((saddr < eaddr) ? saddr : eaddr);
3994 4001 return (hatsize);
3995 4002 }
3996 4003
3997 4004 /*
3998 4005 * Return an array of structures with extended memory map information.
3999 4006 * We allocate here; the caller must deallocate.
4000 4007 */
4001 4008 int
4002 4009 prgetxmap(proc_t *p, list_t *iolhead)
4003 4010 {
4004 4011 struct as *as = p->p_as;
4005 4012 prxmap_t *mp;
4006 4013 struct seg *seg;
4007 4014 struct seg *brkseg, *stkseg;
4008 4015 struct vnode *vp;
4009 4016 struct vattr vattr;
4010 4017 uint_t prot;
4011 4018
4012 4019 ASSERT(as != &kas && AS_WRITE_HELD(as));
4013 4020
4014 4021 /*
4015 4022 * Request an initial buffer size that doesn't waste memory
4016 4023 * if the address space has only a small number of segments.
4017 4024 */
4018 4025 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4019 4026
4020 4027 if ((seg = AS_SEGFIRST(as)) == NULL)
4021 4028 return (0);
4022 4029
4023 4030 brkseg = break_seg(p);
4024 4031 stkseg = as_segat(as, prgetstackbase(p));
4025 4032
4026 4033 do {
4027 4034 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4028 4035 caddr_t saddr, naddr, baddr;
4029 4036 void *tmp = NULL;
4030 4037 ssize_t psz;
4031 4038 char *parr;
4032 4039 uint64_t npages;
4033 4040 uint64_t pagenum;
4034 4041
4035 4042 /*
4036 4043 * Segment loop part one: iterate from the base of the segment
4037 4044 * to its end, pausing at each address boundary (baddr) between
4038 4045 * ranges that have different virtual memory protections.
4039 4046 */
4040 4047 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4041 4048 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4042 4049 ASSERT(baddr >= saddr && baddr <= eaddr);
4043 4050
4044 4051 /*
4045 4052 * Segment loop part two: iterate from the current
4046 4053 * position to the end of the protection boundary,
4047 4054 * pausing at each address boundary (naddr) between
4048 4055 * ranges that have different underlying page sizes.
4049 4056 */
4050 4057 for (; saddr < baddr; saddr = naddr) {
4051 4058 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4052 4059 ASSERT(naddr >= saddr && naddr <= baddr);
4053 4060
4054 4061 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4055 4062
4056 4063 mp->pr_vaddr = (uintptr_t)saddr;
4057 4064 mp->pr_size = naddr - saddr;
4058 4065 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4059 4066 mp->pr_mflags = 0;
4060 4067 if (prot & PROT_READ)
4061 4068 mp->pr_mflags |= MA_READ;
4062 4069 if (prot & PROT_WRITE)
4063 4070 mp->pr_mflags |= MA_WRITE;
4064 4071 if (prot & PROT_EXEC)
4065 4072 mp->pr_mflags |= MA_EXEC;
4066 4073 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4067 4074 mp->pr_mflags |= MA_SHARED;
4068 4075 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4069 4076 mp->pr_mflags |= MA_NORESERVE;
4070 4077 if (seg->s_ops == &segspt_shmops ||
4071 4078 (seg->s_ops == &segvn_ops &&
4072 4079 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4073 4080 vp == NULL)))
4074 4081 mp->pr_mflags |= MA_ANON;
4075 4082 if (seg == brkseg)
4076 4083 mp->pr_mflags |= MA_BREAK;
4077 4084 else if (seg == stkseg)
4078 4085 mp->pr_mflags |= MA_STACK;
4079 4086 if (seg->s_ops == &segspt_shmops)
4080 4087 mp->pr_mflags |= MA_ISM | MA_SHM;
4081 4088
4082 4089 mp->pr_pagesize = PAGESIZE;
4083 4090 if (psz == -1) {
4084 4091 mp->pr_hatpagesize = 0;
4085 4092 } else {
4086 4093 mp->pr_hatpagesize = psz;
4087 4094 }
4088 4095
4089 4096 /*
4090 4097 * Manufacture a filename for the "object" dir.
4091 4098 */
4092 4099 mp->pr_dev = PRNODEV;
4093 4100 vattr.va_mask = AT_FSID|AT_NODEID;
4094 4101 if (seg->s_ops == &segvn_ops &&
4095 4102 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4096 4103 vp != NULL && vp->v_type == VREG &&
4097 4104 VOP_GETATTR(vp, &vattr, 0, CRED(),
4098 4105 NULL) == 0) {
4099 4106 mp->pr_dev = vattr.va_fsid;
4100 4107 mp->pr_ino = vattr.va_nodeid;
4101 4108 if (vp == p->p_exec)
4102 4109 (void) strcpy(mp->pr_mapname,
4103 4110 "a.out");
4104 4111 else
4105 4112 pr_object_name(mp->pr_mapname,
4106 4113 vp, &vattr);
4107 4114 }
4108 4115
4109 4116 /*
4110 4117 * Get the SysV shared memory id, if any.
4111 4118 */
4112 4119 if ((mp->pr_mflags & MA_SHARED) &&
4113 4120 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4114 4121 seg->s_base)) != SHMID_NONE) {
4115 4122 if (mp->pr_shmid == SHMID_FREE)
4116 4123 mp->pr_shmid = -1;
4117 4124
4118 4125 mp->pr_mflags |= MA_SHM;
4119 4126 } else {
4120 4127 mp->pr_shmid = -1;
4121 4128 }
4122 4129
4123 4130 npages = ((uintptr_t)(naddr - saddr)) >>
4124 4131 PAGESHIFT;
4125 4132 parr = kmem_zalloc(npages, KM_SLEEP);
4126 4133
4127 4134 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4128 4135
4129 4136 for (pagenum = 0; pagenum < npages; pagenum++) {
4130 4137 if (parr[pagenum] & SEG_PAGE_INCORE)
4131 4138 mp->pr_rss++;
4132 4139 if (parr[pagenum] & SEG_PAGE_ANON)
4133 4140 mp->pr_anon++;
4134 4141 if (parr[pagenum] & SEG_PAGE_LOCKED)
4135 4142 mp->pr_locked++;
4136 4143 }
4137 4144 kmem_free(parr, npages);
4138 4145 }
4139 4146 }
4140 4147 ASSERT(tmp == NULL);
4141 4148 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4142 4149
4143 4150 return (0);
4144 4151 }
4145 4152
4146 4153 /*
4147 4154 * Return the process's credentials. We don't need a 32-bit equivalent of
4148 4155 * this function because prcred_t and prcred32_t are actually the same.
4149 4156 */
4150 4157 void
4151 4158 prgetcred(proc_t *p, prcred_t *pcrp)
4152 4159 {
4153 4160 mutex_enter(&p->p_crlock);
4154 4161 cred2prcred(p->p_cred, pcrp);
4155 4162 mutex_exit(&p->p_crlock);
4156 4163 }
4157 4164
4158 4165 /*
4159 4166 * Compute actual size of the prpriv_t structure.
4160 4167 */
4161 4168
4162 4169 size_t
4163 4170 prgetprivsize(void)
4164 4171 {
4165 4172 return (priv_prgetprivsize(NULL));
4166 4173 }
4167 4174
4168 4175 /*
4169 4176 * Return the process's privileges. We don't need a 32-bit equivalent of
4170 4177 * this function because prpriv_t and prpriv32_t are actually the same.
4171 4178 */
4172 4179 void
4173 4180 prgetpriv(proc_t *p, prpriv_t *pprp)
4174 4181 {
4175 4182 mutex_enter(&p->p_crlock);
4176 4183 cred2prpriv(p->p_cred, pprp);
4177 4184 mutex_exit(&p->p_crlock);
4178 4185 }
4179 4186
4180 4187 #ifdef _SYSCALL32_IMPL
4181 4188 /*
4182 4189 * Return an array of structures with HAT memory map information.
4183 4190 * We allocate here; the caller must deallocate.
4184 4191 */
4185 4192 int
4186 4193 prgetxmap32(proc_t *p, list_t *iolhead)
4187 4194 {
4188 4195 struct as *as = p->p_as;
4189 4196 prxmap32_t *mp;
4190 4197 struct seg *seg;
4191 4198 struct seg *brkseg, *stkseg;
4192 4199 struct vnode *vp;
4193 4200 struct vattr vattr;
4194 4201 uint_t prot;
4195 4202
4196 4203 ASSERT(as != &kas && AS_WRITE_HELD(as));
4197 4204
4198 4205 /*
4199 4206 * Request an initial buffer size that doesn't waste memory
4200 4207 * if the address space has only a small number of segments.
4201 4208 */
4202 4209 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4203 4210
4204 4211 if ((seg = AS_SEGFIRST(as)) == NULL)
4205 4212 return (0);
4206 4213
4207 4214 brkseg = break_seg(p);
4208 4215 stkseg = as_segat(as, prgetstackbase(p));
4209 4216
4210 4217 do {
4211 4218 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4212 4219 caddr_t saddr, naddr, baddr;
4213 4220 void *tmp = NULL;
4214 4221 ssize_t psz;
4215 4222 char *parr;
4216 4223 uint64_t npages;
4217 4224 uint64_t pagenum;
4218 4225
4219 4226 /*
4220 4227 * Segment loop part one: iterate from the base of the segment
4221 4228 * to its end, pausing at each address boundary (baddr) between
4222 4229 * ranges that have different virtual memory protections.
4223 4230 */
4224 4231 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4225 4232 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4226 4233 ASSERT(baddr >= saddr && baddr <= eaddr);
4227 4234
4228 4235 /*
4229 4236 * Segment loop part two: iterate from the current
4230 4237 * position to the end of the protection boundary,
4231 4238 * pausing at each address boundary (naddr) between
4232 4239 * ranges that have different underlying page sizes.
4233 4240 */
4234 4241 for (; saddr < baddr; saddr = naddr) {
4235 4242 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4236 4243 ASSERT(naddr >= saddr && naddr <= baddr);
4237 4244
4238 4245 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4239 4246
4240 4247 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4241 4248 mp->pr_size = (size32_t)(naddr - saddr);
4242 4249 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4243 4250 mp->pr_mflags = 0;
4244 4251 if (prot & PROT_READ)
4245 4252 mp->pr_mflags |= MA_READ;
4246 4253 if (prot & PROT_WRITE)
4247 4254 mp->pr_mflags |= MA_WRITE;
4248 4255 if (prot & PROT_EXEC)
4249 4256 mp->pr_mflags |= MA_EXEC;
4250 4257 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4251 4258 mp->pr_mflags |= MA_SHARED;
4252 4259 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4253 4260 mp->pr_mflags |= MA_NORESERVE;
4254 4261 if (seg->s_ops == &segspt_shmops ||
4255 4262 (seg->s_ops == &segvn_ops &&
4256 4263 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4257 4264 vp == NULL)))
4258 4265 mp->pr_mflags |= MA_ANON;
4259 4266 if (seg == brkseg)
4260 4267 mp->pr_mflags |= MA_BREAK;
4261 4268 else if (seg == stkseg)
4262 4269 mp->pr_mflags |= MA_STACK;
4263 4270 if (seg->s_ops == &segspt_shmops)
4264 4271 mp->pr_mflags |= MA_ISM | MA_SHM;
4265 4272
4266 4273 mp->pr_pagesize = PAGESIZE;
4267 4274 if (psz == -1) {
4268 4275 mp->pr_hatpagesize = 0;
4269 4276 } else {
4270 4277 mp->pr_hatpagesize = psz;
4271 4278 }
4272 4279
4273 4280 /*
4274 4281 * Manufacture a filename for the "object" dir.
4275 4282 */
4276 4283 mp->pr_dev = PRNODEV32;
4277 4284 vattr.va_mask = AT_FSID|AT_NODEID;
4278 4285 if (seg->s_ops == &segvn_ops &&
4279 4286 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4280 4287 vp != NULL && vp->v_type == VREG &&
4281 4288 VOP_GETATTR(vp, &vattr, 0, CRED(),
4282 4289 NULL) == 0) {
4283 4290 (void) cmpldev(&mp->pr_dev,
4284 4291 vattr.va_fsid);
4285 4292 mp->pr_ino = vattr.va_nodeid;
4286 4293 if (vp == p->p_exec)
4287 4294 (void) strcpy(mp->pr_mapname,
4288 4295 "a.out");
4289 4296 else
4290 4297 pr_object_name(mp->pr_mapname,
4291 4298 vp, &vattr);
4292 4299 }
4293 4300
4294 4301 /*
4295 4302 * Get the SysV shared memory id, if any.
4296 4303 */
4297 4304 if ((mp->pr_mflags & MA_SHARED) &&
4298 4305 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4299 4306 seg->s_base)) != SHMID_NONE) {
4300 4307 if (mp->pr_shmid == SHMID_FREE)
4301 4308 mp->pr_shmid = -1;
4302 4309
4303 4310 mp->pr_mflags |= MA_SHM;
4304 4311 } else {
4305 4312 mp->pr_shmid = -1;
4306 4313 }
4307 4314
4308 4315 npages = ((uintptr_t)(naddr - saddr)) >>
4309 4316 PAGESHIFT;
4310 4317 parr = kmem_zalloc(npages, KM_SLEEP);
4311 4318
4312 4319 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4313 4320
4314 4321 for (pagenum = 0; pagenum < npages; pagenum++) {
4315 4322 if (parr[pagenum] & SEG_PAGE_INCORE)
4316 4323 mp->pr_rss++;
4317 4324 if (parr[pagenum] & SEG_PAGE_ANON)
4318 4325 mp->pr_anon++;
4319 4326 if (parr[pagenum] & SEG_PAGE_LOCKED)
4320 4327 mp->pr_locked++;
4321 4328 }
4322 4329 kmem_free(parr, npages);
4323 4330 }
4324 4331 }
4325 4332 ASSERT(tmp == NULL);
4326 4333 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4327 4334
4328 4335 return (0);
4329 4336 }
4330 4337 #endif /* _SYSCALL32_IMPL */
|
↓ open down ↓ |
3784 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX