Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/vm/vm_as.c
+++ new/usr/src/uts/common/vm/vm_as.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 - * Copyright 2016 Joyent, Inc.
24 + * Copyright 2015, Joyent, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 29 /* All Rights Reserved */
30 30
31 31 /*
32 32 * University Copyright- Copyright (c) 1982, 1986, 1988
33 33 * The Regents of the University of California
34 34 * All Rights Reserved
35 35 *
36 36 * University Acknowledgment- Portions of this document are derived from
37 37 * software developed by the University of California, Berkeley, and its
38 38 * contributors.
39 39 */
40 40
41 41 /*
42 42 * VM - address spaces.
43 43 */
44 44
45 45 #include <sys/types.h>
46 46 #include <sys/t_lock.h>
47 47 #include <sys/param.h>
48 48 #include <sys/errno.h>
49 49 #include <sys/systm.h>
50 50 #include <sys/mman.h>
51 51 #include <sys/sysmacros.h>
52 52 #include <sys/cpuvar.h>
53 53 #include <sys/sysinfo.h>
54 54 #include <sys/kmem.h>
55 55 #include <sys/vnode.h>
56 56 #include <sys/vmsystm.h>
57 57 #include <sys/cmn_err.h>
58 58 #include <sys/debug.h>
59 59 #include <sys/tnf_probe.h>
60 60 #include <sys/vtrace.h>
61 61 #include <sys/ddi.h>
62 62
63 63 #include <vm/hat.h>
64 64 #include <vm/as.h>
|
↓ open down ↓ |
30 lines elided |
↑ open up ↑ |
65 65 #include <vm/seg.h>
66 66 #include <vm/seg_vn.h>
67 67 #include <vm/seg_dev.h>
68 68 #include <vm/seg_kmem.h>
69 69 #include <vm/seg_map.h>
70 70 #include <vm/seg_spt.h>
71 71 #include <vm/page.h>
72 72
73 73 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
74 74
75 -ulong_t as_user_seg_limit = 0xffff; /* max segments in an (non-kas) AS */
76 -
77 75 static struct kmem_cache *as_cache;
78 76
79 77 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
80 78 static void as_clearwatchprot(struct as *, caddr_t, size_t);
81 79 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
82 80
83 81
84 82 /*
85 83 * Verifying the segment lists is very time-consuming; it may not be
86 84 * desirable always to define VERIFY_SEGLIST when DEBUG is set.
87 85 */
88 86 #ifdef DEBUG
89 87 #define VERIFY_SEGLIST
90 88 int do_as_verify = 0;
91 89 #endif
92 90
93 91 /*
94 92 * Allocate a new callback data structure entry and fill in the events of
95 93 * interest, the address range of interest, and the callback argument.
96 94 * Link the entry on the as->a_callbacks list. A callback entry for the
97 95 * entire address space may be specified with vaddr = 0 and size = -1.
98 96 *
99 97 * CALLERS RESPONSIBILITY: If not calling from within the process context for
100 98 * the specified as, the caller must guarantee persistence of the specified as
101 99 * for the duration of this function (eg. pages being locked within the as
102 100 * will guarantee persistence).
103 101 */
104 102 int
105 103 as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events,
106 104 caddr_t vaddr, size_t size, int sleepflag)
107 105 {
108 106 struct as_callback *current_head, *cb;
109 107 caddr_t saddr;
110 108 size_t rsize;
111 109
112 110 /* callback function and an event are mandatory */
113 111 if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0))
114 112 return (EINVAL);
115 113
116 114 /* Adding a callback after as_free has been called is not allowed */
117 115 if (as == &kas)
118 116 return (ENOMEM);
119 117
120 118 /*
121 119 * vaddr = 0 and size = -1 is used to indicate that the callback range
122 120 * is the entire address space so no rounding is done in that case.
123 121 */
124 122 if (size != -1) {
125 123 saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
126 124 rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) -
127 125 (size_t)saddr;
128 126 /* check for wraparound */
129 127 if (saddr + rsize < saddr)
130 128 return (ENOMEM);
131 129 } else {
132 130 if (vaddr != 0)
133 131 return (EINVAL);
134 132 saddr = vaddr;
135 133 rsize = size;
136 134 }
137 135
138 136 /* Allocate and initialize a callback entry */
139 137 cb = kmem_zalloc(sizeof (struct as_callback), sleepflag);
140 138 if (cb == NULL)
141 139 return (EAGAIN);
142 140
143 141 cb->ascb_func = cb_func;
144 142 cb->ascb_arg = arg;
145 143 cb->ascb_events = events;
146 144 cb->ascb_saddr = saddr;
147 145 cb->ascb_len = rsize;
148 146
149 147 /* Add the entry to the list */
150 148 mutex_enter(&as->a_contents);
151 149 current_head = as->a_callbacks;
152 150 as->a_callbacks = cb;
153 151 cb->ascb_next = current_head;
154 152
155 153 /*
156 154 * The call to this function may lose in a race with
157 155 * a pertinent event - eg. a thread does long term memory locking
158 156 * but before the callback is added another thread executes as_unmap.
159 157 * A broadcast here resolves that.
160 158 */
161 159 if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) {
162 160 AS_CLRUNMAPWAIT(as);
163 161 cv_broadcast(&as->a_cv);
164 162 }
165 163
166 164 mutex_exit(&as->a_contents);
167 165 return (0);
168 166 }
169 167
170 168 /*
171 169 * Search the callback list for an entry which pertains to arg.
172 170 *
173 171 * This is called from within the client upon completion of the callback.
174 172 * RETURN VALUES:
175 173 * AS_CALLBACK_DELETED (callback entry found and deleted)
176 174 * AS_CALLBACK_NOTFOUND (no callback entry found - this is ok)
177 175 * AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this
178 176 * entry will be made in as_do_callbacks)
179 177 *
180 178 * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED
181 179 * set, it indicates that as_do_callbacks is processing this entry. The
182 180 * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made
183 181 * to unblock as_do_callbacks, in case it is blocked.
184 182 *
185 183 * CALLERS RESPONSIBILITY: If not calling from within the process context for
186 184 * the specified as, the caller must guarantee persistence of the specified as
187 185 * for the duration of this function (eg. pages being locked within the as
188 186 * will guarantee persistence).
189 187 */
190 188 uint_t
191 189 as_delete_callback(struct as *as, void *arg)
192 190 {
193 191 struct as_callback **prevcb = &as->a_callbacks;
194 192 struct as_callback *cb;
195 193 uint_t rc = AS_CALLBACK_NOTFOUND;
196 194
197 195 mutex_enter(&as->a_contents);
198 196 for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) {
199 197 if (cb->ascb_arg != arg)
200 198 continue;
201 199
202 200 /*
203 201 * If the events indicate AS_CALLBACK_CALLED, just clear
204 202 * AS_ALL_EVENT in the events field and wakeup the thread
205 203 * that may be waiting in as_do_callbacks. as_do_callbacks
206 204 * will take care of removing this entry from the list. In
207 205 * that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise
208 206 * (AS_CALLBACK_CALLED not set), just remove it from the
209 207 * list, return the memory and return AS_CALLBACK_DELETED.
210 208 */
211 209 if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) {
212 210 /* leave AS_CALLBACK_CALLED */
213 211 cb->ascb_events &= ~AS_ALL_EVENT;
214 212 rc = AS_CALLBACK_DELETE_DEFERRED;
215 213 cv_broadcast(&as->a_cv);
216 214 } else {
217 215 *prevcb = cb->ascb_next;
218 216 kmem_free(cb, sizeof (struct as_callback));
219 217 rc = AS_CALLBACK_DELETED;
220 218 }
221 219 break;
222 220 }
223 221 mutex_exit(&as->a_contents);
224 222 return (rc);
225 223 }
226 224
227 225 /*
228 226 * Searches the as callback list for a matching entry.
229 227 * Returns a pointer to the first matching callback, or NULL if
230 228 * nothing is found.
231 229 * This function never sleeps so it is ok to call it with more
232 230 * locks held but the (required) a_contents mutex.
233 231 *
234 232 * See also comment on as_do_callbacks below.
235 233 */
236 234 static struct as_callback *
237 235 as_find_callback(struct as *as, uint_t events, caddr_t event_addr,
238 236 size_t event_len)
239 237 {
240 238 struct as_callback *cb;
241 239
242 240 ASSERT(MUTEX_HELD(&as->a_contents));
243 241 for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) {
244 242 /*
245 243 * If the callback has not already been called, then
246 244 * check if events or address range pertains. An event_len
247 245 * of zero means do an unconditional callback.
248 246 */
249 247 if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) ||
250 248 ((event_len != 0) && (((cb->ascb_events & events) == 0) ||
251 249 (event_addr + event_len < cb->ascb_saddr) ||
252 250 (event_addr > (cb->ascb_saddr + cb->ascb_len))))) {
253 251 continue;
254 252 }
255 253 break;
256 254 }
257 255 return (cb);
258 256 }
259 257
260 258 /*
261 259 * Executes a given callback and removes it from the callback list for
262 260 * this address space.
263 261 * This function may sleep so the caller must drop all locks except
264 262 * a_contents before calling this func.
265 263 *
266 264 * See also comments on as_do_callbacks below.
267 265 */
268 266 static void
269 267 as_execute_callback(struct as *as, struct as_callback *cb,
270 268 uint_t events)
271 269 {
272 270 struct as_callback **prevcb;
273 271 void *cb_arg;
274 272
275 273 ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events));
276 274 cb->ascb_events |= AS_CALLBACK_CALLED;
277 275 mutex_exit(&as->a_contents);
278 276 (*cb->ascb_func)(as, cb->ascb_arg, events);
279 277 mutex_enter(&as->a_contents);
280 278 /*
281 279 * the callback function is required to delete the callback
282 280 * when the callback function determines it is OK for
283 281 * this thread to continue. as_delete_callback will clear
284 282 * the AS_ALL_EVENT in the events field when it is deleted.
285 283 * If the callback function called as_delete_callback,
286 284 * events will already be cleared and there will be no blocking.
287 285 */
288 286 while ((cb->ascb_events & events) != 0) {
289 287 cv_wait(&as->a_cv, &as->a_contents);
290 288 }
291 289 /*
292 290 * This entry needs to be taken off the list. Normally, the
293 291 * callback func itself does that, but unfortunately the list
294 292 * may have changed while the callback was running because the
295 293 * a_contents mutex was dropped and someone else other than the
296 294 * callback func itself could have called as_delete_callback,
297 295 * so we have to search to find this entry again. The entry
298 296 * must have AS_CALLBACK_CALLED, and have the same 'arg'.
299 297 */
300 298 cb_arg = cb->ascb_arg;
301 299 prevcb = &as->a_callbacks;
302 300 for (cb = as->a_callbacks; cb != NULL;
303 301 prevcb = &cb->ascb_next, cb = *prevcb) {
304 302 if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) ||
305 303 (cb_arg != cb->ascb_arg)) {
306 304 continue;
307 305 }
308 306 *prevcb = cb->ascb_next;
309 307 kmem_free(cb, sizeof (struct as_callback));
310 308 break;
311 309 }
312 310 }
313 311
314 312 /*
315 313 * Check the callback list for a matching event and intersection of
316 314 * address range. If there is a match invoke the callback. Skip an entry if:
317 315 * - a callback is already in progress for this entry (AS_CALLBACK_CALLED)
318 316 * - not event of interest
319 317 * - not address range of interest
320 318 *
321 319 * An event_len of zero indicates a request for an unconditional callback
322 320 * (regardless of event), only the AS_CALLBACK_CALLED is checked. The
323 321 * a_contents lock must be dropped before a callback, so only one callback
324 322 * can be done before returning. Return -1 (true) if a callback was
325 323 * executed and removed from the list, else return 0 (false).
326 324 *
327 325 * The logically separate parts, i.e. finding a matching callback and
328 326 * executing a given callback have been separated into two functions
329 327 * so that they can be called with different sets of locks held beyond
330 328 * the always-required a_contents. as_find_callback does not sleep so
331 329 * it is ok to call it if more locks than a_contents (i.e. the a_lock
332 330 * rwlock) are held. as_execute_callback on the other hand may sleep
333 331 * so all locks beyond a_contents must be dropped by the caller if one
334 332 * does not want to end comatose.
335 333 */
336 334 static int
337 335 as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr,
338 336 size_t event_len)
339 337 {
340 338 struct as_callback *cb;
341 339
342 340 if ((cb = as_find_callback(as, events, event_addr, event_len))) {
343 341 as_execute_callback(as, cb, events);
344 342 return (-1);
345 343 }
346 344 return (0);
347 345 }
348 346
349 347 /*
350 348 * Search for the segment containing addr. If a segment containing addr
351 349 * exists, that segment is returned. If no such segment exists, and
352 350 * the list spans addresses greater than addr, then the first segment
353 351 * whose base is greater than addr is returned; otherwise, NULL is
354 352 * returned unless tail is true, in which case the last element of the
355 353 * list is returned.
356 354 *
357 355 * a_seglast is used to cache the last found segment for repeated
358 356 * searches to the same addr (which happens frequently).
359 357 */
360 358 struct seg *
361 359 as_findseg(struct as *as, caddr_t addr, int tail)
362 360 {
363 361 struct seg *seg = as->a_seglast;
364 362 avl_index_t where;
365 363
366 364 ASSERT(AS_LOCK_HELD(as));
367 365
368 366 if (seg != NULL &&
369 367 seg->s_base <= addr &&
370 368 addr < seg->s_base + seg->s_size)
371 369 return (seg);
372 370
373 371 seg = avl_find(&as->a_segtree, &addr, &where);
374 372 if (seg != NULL)
375 373 return (as->a_seglast = seg);
376 374
377 375 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
378 376 if (seg == NULL && tail)
379 377 seg = avl_last(&as->a_segtree);
380 378 return (as->a_seglast = seg);
381 379 }
382 380
383 381 #ifdef VERIFY_SEGLIST
384 382 /*
385 383 * verify that the linked list is coherent
386 384 */
387 385 static void
388 386 as_verify(struct as *as)
389 387 {
390 388 struct seg *seg, *seglast, *p, *n;
391 389 uint_t nsegs = 0;
392 390
393 391 if (do_as_verify == 0)
394 392 return;
395 393
396 394 seglast = as->a_seglast;
397 395
398 396 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
399 397 ASSERT(seg->s_as == as);
400 398 p = AS_SEGPREV(as, seg);
401 399 n = AS_SEGNEXT(as, seg);
402 400 ASSERT(p == NULL || p->s_as == as);
403 401 ASSERT(p == NULL || p->s_base < seg->s_base);
404 402 ASSERT(n == NULL || n->s_base > seg->s_base);
405 403 ASSERT(n != NULL || seg == avl_last(&as->a_segtree));
406 404 if (seg == seglast)
407 405 seglast = NULL;
408 406 nsegs++;
409 407 }
410 408 ASSERT(seglast == NULL);
411 409 ASSERT(avl_numnodes(&as->a_segtree) == nsegs);
412 410 }
413 411 #endif /* VERIFY_SEGLIST */
414 412
415 413 /*
416 414 * Add a new segment to the address space. The avl_find()
417 415 * may be expensive so we attempt to use last segment accessed
418 416 * in as_gap() as an insertion point.
419 417 */
420 418 int
421 419 as_addseg(struct as *as, struct seg *newseg)
422 420 {
423 421 struct seg *seg;
424 422 caddr_t addr;
425 423 caddr_t eaddr;
426 424 avl_index_t where;
427 425
428 426 ASSERT(AS_WRITE_HELD(as));
429 427
430 428 as->a_updatedir = 1; /* inform /proc */
431 429 gethrestime(&as->a_updatetime);
432 430
433 431 if (as->a_lastgaphl != NULL) {
434 432 struct seg *hseg = NULL;
435 433 struct seg *lseg = NULL;
436 434
437 435 if (as->a_lastgaphl->s_base > newseg->s_base) {
438 436 hseg = as->a_lastgaphl;
439 437 lseg = AVL_PREV(&as->a_segtree, hseg);
440 438 } else {
441 439 lseg = as->a_lastgaphl;
442 440 hseg = AVL_NEXT(&as->a_segtree, lseg);
443 441 }
444 442
445 443 if (hseg && lseg && lseg->s_base < newseg->s_base &&
446 444 hseg->s_base > newseg->s_base) {
447 445 avl_insert_here(&as->a_segtree, newseg, lseg,
448 446 AVL_AFTER);
449 447 as->a_lastgaphl = NULL;
450 448 as->a_seglast = newseg;
451 449 return (0);
452 450 }
453 451 as->a_lastgaphl = NULL;
454 452 }
455 453
456 454 addr = newseg->s_base;
457 455 eaddr = addr + newseg->s_size;
458 456 again:
459 457
460 458 seg = avl_find(&as->a_segtree, &addr, &where);
461 459
462 460 if (seg == NULL)
463 461 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
464 462
465 463 if (seg == NULL)
466 464 seg = avl_last(&as->a_segtree);
467 465
468 466 if (seg != NULL) {
469 467 caddr_t base = seg->s_base;
470 468
471 469 /*
472 470 * If top of seg is below the requested address, then
473 471 * the insertion point is at the end of the linked list,
474 472 * and seg points to the tail of the list. Otherwise,
475 473 * the insertion point is immediately before seg.
476 474 */
477 475 if (base + seg->s_size > addr) {
478 476 if (addr >= base || eaddr > base) {
479 477 #ifdef __sparc
480 478 extern struct seg_ops segnf_ops;
481 479
482 480 /*
483 481 * no-fault segs must disappear if overlaid.
484 482 * XXX need new segment type so
485 483 * we don't have to check s_ops
486 484 */
487 485 if (seg->s_ops == &segnf_ops) {
488 486 seg_unmap(seg);
489 487 goto again;
490 488 }
491 489 #endif
492 490 return (-1); /* overlapping segment */
493 491 }
494 492 }
495 493 }
496 494 as->a_seglast = newseg;
497 495 avl_insert(&as->a_segtree, newseg, where);
498 496
499 497 #ifdef VERIFY_SEGLIST
500 498 as_verify(as);
501 499 #endif
502 500 return (0);
503 501 }
504 502
505 503 struct seg *
506 504 as_removeseg(struct as *as, struct seg *seg)
507 505 {
508 506 avl_tree_t *t;
509 507
510 508 ASSERT(AS_WRITE_HELD(as));
511 509
512 510 as->a_updatedir = 1; /* inform /proc */
513 511 gethrestime(&as->a_updatetime);
514 512
515 513 if (seg == NULL)
516 514 return (NULL);
517 515
518 516 t = &as->a_segtree;
519 517 if (as->a_seglast == seg)
520 518 as->a_seglast = NULL;
521 519 as->a_lastgaphl = NULL;
522 520
523 521 /*
524 522 * if this segment is at an address higher than
525 523 * a_lastgap, set a_lastgap to the next segment (NULL if last segment)
526 524 */
527 525 if (as->a_lastgap &&
528 526 (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base))
529 527 as->a_lastgap = AVL_NEXT(t, seg);
530 528
531 529 /*
532 530 * remove the segment from the seg tree
533 531 */
534 532 avl_remove(t, seg);
535 533
536 534 #ifdef VERIFY_SEGLIST
537 535 as_verify(as);
538 536 #endif
539 537 return (seg);
540 538 }
541 539
542 540 /*
543 541 * Find a segment containing addr.
544 542 */
545 543 struct seg *
546 544 as_segat(struct as *as, caddr_t addr)
547 545 {
548 546 struct seg *seg = as->a_seglast;
549 547
550 548 ASSERT(AS_LOCK_HELD(as));
551 549
552 550 if (seg != NULL && seg->s_base <= addr &&
553 551 addr < seg->s_base + seg->s_size)
554 552 return (seg);
555 553
556 554 seg = avl_find(&as->a_segtree, &addr, NULL);
557 555 return (seg);
558 556 }
559 557
560 558 /*
561 559 * Serialize all searches for holes in an address space to
562 560 * prevent two or more threads from allocating the same virtual
563 561 * address range. The address space must not be "read/write"
564 562 * locked by the caller since we may block.
565 563 */
566 564 void
567 565 as_rangelock(struct as *as)
568 566 {
569 567 mutex_enter(&as->a_contents);
570 568 while (AS_ISCLAIMGAP(as))
571 569 cv_wait(&as->a_cv, &as->a_contents);
572 570 AS_SETCLAIMGAP(as);
573 571 mutex_exit(&as->a_contents);
574 572 }
575 573
576 574 /*
577 575 * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads.
578 576 */
579 577 void
580 578 as_rangeunlock(struct as *as)
581 579 {
582 580 mutex_enter(&as->a_contents);
583 581 AS_CLRCLAIMGAP(as);
584 582 cv_signal(&as->a_cv);
585 583 mutex_exit(&as->a_contents);
586 584 }
587 585
588 586 /*
589 587 * compar segments (or just an address) by segment address range
590 588 */
591 589 static int
592 590 as_segcompar(const void *x, const void *y)
593 591 {
594 592 struct seg *a = (struct seg *)x;
595 593 struct seg *b = (struct seg *)y;
596 594
597 595 if (a->s_base < b->s_base)
598 596 return (-1);
599 597 if (a->s_base >= b->s_base + b->s_size)
600 598 return (1);
601 599 return (0);
602 600 }
603 601
604 602
605 603 void
606 604 as_avlinit(struct as *as)
607 605 {
608 606 avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg),
609 607 offsetof(struct seg, s_tree));
610 608 avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page),
611 609 offsetof(struct watched_page, wp_link));
612 610 }
613 611
614 612 /*ARGSUSED*/
615 613 static int
616 614 as_constructor(void *buf, void *cdrarg, int kmflags)
617 615 {
618 616 struct as *as = buf;
619 617
620 618 mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL);
621 619 cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL);
622 620 rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL);
623 621 as_avlinit(as);
624 622 return (0);
625 623 }
626 624
627 625 /*ARGSUSED1*/
628 626 static void
629 627 as_destructor(void *buf, void *cdrarg)
630 628 {
631 629 struct as *as = buf;
632 630
633 631 avl_destroy(&as->a_segtree);
634 632 mutex_destroy(&as->a_contents);
635 633 cv_destroy(&as->a_cv);
636 634 rw_destroy(&as->a_lock);
637 635 }
638 636
639 637 void
640 638 as_init(void)
641 639 {
642 640 as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0,
643 641 as_constructor, as_destructor, NULL, NULL, NULL, 0);
644 642 }
645 643
646 644 /*
647 645 * Allocate and initialize an address space data structure.
648 646 * We call hat_alloc to allow any machine dependent
649 647 * information in the hat structure to be initialized.
650 648 */
651 649 struct as *
652 650 as_alloc(void)
653 651 {
654 652 struct as *as;
655 653
656 654 as = kmem_cache_alloc(as_cache, KM_SLEEP);
657 655
658 656 as->a_flags = 0;
659 657 as->a_vbits = 0;
660 658 as->a_hrm = NULL;
661 659 as->a_seglast = NULL;
662 660 as->a_size = 0;
663 661 as->a_resvsize = 0;
664 662 as->a_updatedir = 0;
665 663 gethrestime(&as->a_updatetime);
666 664 as->a_objectdir = NULL;
667 665 as->a_sizedir = 0;
668 666 as->a_userlimit = (caddr_t)USERLIMIT;
669 667 as->a_lastgap = NULL;
670 668 as->a_lastgaphl = NULL;
671 669 as->a_callbacks = NULL;
672 670 as->a_proc = NULL;
673 671
674 672 AS_LOCK_ENTER(as, RW_WRITER);
675 673 as->a_hat = hat_alloc(as); /* create hat for default system mmu */
676 674 AS_LOCK_EXIT(as);
677 675
678 676 return (as);
679 677 }
680 678
681 679 /*
682 680 * Free an address space data structure.
683 681 * Need to free the hat first and then
684 682 * all the segments on this as and finally
685 683 * the space for the as struct itself.
686 684 */
687 685 void
688 686 as_free(struct as *as)
689 687 {
690 688 struct hat *hat = as->a_hat;
691 689 struct seg *seg, *next;
692 690 boolean_t free_started = B_FALSE;
693 691
694 692 top:
695 693 /*
696 694 * Invoke ALL callbacks. as_do_callbacks will do one callback
697 695 * per call, and not return (-1) until the callback has completed.
698 696 * When as_do_callbacks returns zero, all callbacks have completed.
699 697 */
700 698 mutex_enter(&as->a_contents);
701 699 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
702 700 ;
703 701
704 702 mutex_exit(&as->a_contents);
705 703 AS_LOCK_ENTER(as, RW_WRITER);
706 704
707 705 if (!free_started) {
708 706 free_started = B_TRUE;
709 707 hat_free_start(hat);
710 708 }
711 709 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
712 710 int err;
713 711
714 712 next = AS_SEGNEXT(as, seg);
715 713 retry:
716 714 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
717 715 if (err == EAGAIN) {
718 716 mutex_enter(&as->a_contents);
719 717 if (as->a_callbacks) {
720 718 AS_LOCK_EXIT(as);
721 719 } else if (!AS_ISNOUNMAPWAIT(as)) {
722 720 /*
723 721 * Memory is currently locked. Wait for a
724 722 * cv_signal that it has been unlocked, then
725 723 * try the operation again.
726 724 */
727 725 if (AS_ISUNMAPWAIT(as) == 0)
728 726 cv_broadcast(&as->a_cv);
729 727 AS_SETUNMAPWAIT(as);
730 728 AS_LOCK_EXIT(as);
731 729 while (AS_ISUNMAPWAIT(as))
732 730 cv_wait(&as->a_cv, &as->a_contents);
733 731 } else {
734 732 /*
735 733 * We may have raced with
736 734 * segvn_reclaim()/segspt_reclaim(). In this
737 735 * case clean nounmapwait flag and retry since
738 736 * softlockcnt in this segment may be already
739 737 * 0. We don't drop as writer lock so our
740 738 * number of retries without sleeping should
741 739 * be very small. See segvn_reclaim() for
742 740 * more comments.
743 741 */
744 742 AS_CLRNOUNMAPWAIT(as);
745 743 mutex_exit(&as->a_contents);
746 744 goto retry;
747 745 }
748 746 mutex_exit(&as->a_contents);
749 747 goto top;
750 748 } else {
751 749 /*
752 750 * We do not expect any other error return at this
753 751 * time. This is similar to an ASSERT in seg_unmap()
754 752 */
755 753 ASSERT(err == 0);
756 754 }
757 755 }
758 756 hat_free_end(hat);
759 757 AS_LOCK_EXIT(as);
760 758
761 759 /* /proc stuff */
762 760 ASSERT(avl_numnodes(&as->a_wpage) == 0);
763 761 if (as->a_objectdir) {
764 762 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
765 763 as->a_objectdir = NULL;
766 764 as->a_sizedir = 0;
767 765 }
768 766
769 767 /*
770 768 * Free the struct as back to kmem. Assert it has no segments.
771 769 */
772 770 ASSERT(avl_numnodes(&as->a_segtree) == 0);
773 771 kmem_cache_free(as_cache, as);
774 772 }
775 773
776 774 int
777 775 as_dup(struct as *as, struct proc *forkedproc)
778 776 {
779 777 struct as *newas;
780 778 struct seg *seg, *newseg;
781 779 size_t purgesize = 0;
782 780 int error;
783 781
784 782 AS_LOCK_ENTER(as, RW_WRITER);
785 783 as_clearwatch(as);
786 784 newas = as_alloc();
787 785 newas->a_userlimit = as->a_userlimit;
788 786 newas->a_proc = forkedproc;
789 787
790 788 AS_LOCK_ENTER(newas, RW_WRITER);
791 789
792 790 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
793 791
794 792 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
795 793
796 794 if (seg->s_flags & S_PURGE) {
797 795 purgesize += seg->s_size;
798 796 continue;
799 797 }
800 798
801 799 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
802 800 if (newseg == NULL) {
803 801 AS_LOCK_EXIT(newas);
804 802 as_setwatch(as);
805 803 AS_LOCK_EXIT(as);
806 804 as_free(newas);
807 805 return (-1);
808 806 }
809 807 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
810 808 /*
811 809 * We call seg_free() on the new seg
812 810 * because the segment is not set up
813 811 * completely; i.e. it has no ops.
814 812 */
815 813 as_setwatch(as);
816 814 AS_LOCK_EXIT(as);
817 815 seg_free(newseg);
818 816 AS_LOCK_EXIT(newas);
819 817 as_free(newas);
820 818 return (error);
821 819 }
822 820 newas->a_size += seg->s_size;
823 821 }
824 822 newas->a_resvsize = as->a_resvsize - purgesize;
825 823
826 824 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
827 825
828 826 AS_LOCK_EXIT(newas);
829 827
830 828 as_setwatch(as);
831 829 AS_LOCK_EXIT(as);
832 830 if (error != 0) {
833 831 as_free(newas);
834 832 return (error);
835 833 }
836 834 forkedproc->p_as = newas;
837 835 return (0);
838 836 }
839 837
840 838 /*
841 839 * Handle a ``fault'' at addr for size bytes.
842 840 */
843 841 faultcode_t
844 842 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
845 843 enum fault_type type, enum seg_rw rw)
846 844 {
847 845 struct seg *seg;
848 846 caddr_t raddr; /* rounded down addr */
849 847 size_t rsize; /* rounded up size */
850 848 size_t ssize;
851 849 faultcode_t res = 0;
852 850 caddr_t addrsav;
853 851 struct seg *segsav;
854 852 int as_lock_held;
855 853 klwp_t *lwp = ttolwp(curthread);
856 854 zone_t *zonep = curzone;
857 855
858 856 retry:
859 857 /*
860 858 * Indicate that the lwp is not to be stopped while waiting for a
861 859 * pagefault. This is to avoid deadlock while debugging a process
862 860 * via /proc over NFS (in particular).
863 861 */
864 862 if (lwp != NULL)
865 863 lwp->lwp_nostop++;
866 864
867 865 /*
868 866 * same length must be used when we softlock and softunlock. We
869 867 * don't support softunlocking lengths less than the original length
870 868 * when there is largepage support. See seg_dev.c for more
871 869 * comments.
872 870 */
873 871 switch (type) {
874 872
875 873 case F_SOFTLOCK:
876 874 CPU_STATS_ADD_K(vm, softlock, 1);
877 875 break;
878 876
879 877 case F_SOFTUNLOCK:
880 878 break;
881 879
882 880 case F_PROT:
883 881 CPU_STATS_ADD_K(vm, prot_fault, 1);
|
↓ open down ↓ |
797 lines elided |
↑ open up ↑ |
884 882 break;
885 883
886 884 case F_INVAL:
887 885 CPU_STATS_ENTER_K();
888 886 CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
889 887 if (as == &kas)
890 888 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
891 889 CPU_STATS_EXIT_K();
892 890 if (zonep->zone_pg_flt_delay != 0) {
893 891 /*
894 - * The zone in which this process is running is
895 - * currently over it's physical memory cap. Throttle
896 - * page faults to help the user-land memory capper
897 - * catch up. Note that drv_usectohz() rounds up.
892 + * The zone in which this process is running
893 + * is currently over it's physical memory cap.
894 + * Throttle page faults to help the user-land
895 + * memory capper catch up. Note that
896 + * drv_usectohz() rounds up.
898 897 */
899 898 atomic_add_64(&zonep->zone_pf_throttle, 1);
900 899 atomic_add_64(&zonep->zone_pf_throttle_usec,
901 900 zonep->zone_pg_flt_delay);
902 - if (zonep->zone_pg_flt_delay < TICK_TO_USEC(1)) {
901 + if (zonep->zone_pg_flt_delay < TICK_TO_USEC(1))
903 902 drv_usecwait(zonep->zone_pg_flt_delay);
904 - } else {
903 + else
905 904 delay(drv_usectohz(zonep->zone_pg_flt_delay));
906 - }
907 905 }
908 906 break;
909 907 }
910 908
911 909 /* Kernel probe */
912 910 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
913 911 tnf_opaque, address, addr,
914 912 tnf_fault_type, fault_type, type,
915 913 tnf_seg_access, access, rw);
916 914
917 915 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
918 916 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
919 917 (size_t)raddr;
920 918
921 919 /*
922 920 * XXX -- Don't grab the as lock for segkmap. We should grab it for
923 921 * correctness, but then we could be stuck holding this lock for
924 922 * a LONG time if the fault needs to be resolved on a slow
925 923 * filesystem, and then no-one will be able to exec new commands,
926 924 * as exec'ing requires the write lock on the as.
927 925 */
928 926 if (as == &kas && segkmap && segkmap->s_base <= raddr &&
929 927 raddr + size < segkmap->s_base + segkmap->s_size) {
930 928 seg = segkmap;
931 929 as_lock_held = 0;
932 930 } else {
933 931 AS_LOCK_ENTER(as, RW_READER);
934 932
935 933 seg = as_segat(as, raddr);
936 934 if (seg == NULL) {
937 935 AS_LOCK_EXIT(as);
938 936 if (lwp != NULL)
939 937 lwp->lwp_nostop--;
940 938 return (FC_NOMAP);
941 939 }
942 940
943 941 as_lock_held = 1;
944 942 }
945 943
946 944 addrsav = raddr;
947 945 segsav = seg;
948 946
949 947 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
950 948 if (raddr >= seg->s_base + seg->s_size) {
951 949 seg = AS_SEGNEXT(as, seg);
952 950 if (seg == NULL || raddr != seg->s_base) {
953 951 res = FC_NOMAP;
954 952 break;
955 953 }
956 954 }
957 955 if (raddr + rsize > seg->s_base + seg->s_size)
958 956 ssize = seg->s_base + seg->s_size - raddr;
959 957 else
960 958 ssize = rsize;
961 959
962 960 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);
963 961 if (res != 0)
964 962 break;
965 963 }
966 964
967 965 /*
968 966 * If we were SOFTLOCKing and encountered a failure,
969 967 * we must SOFTUNLOCK the range we already did. (Maybe we
970 968 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
971 969 * right here...)
972 970 */
973 971 if (res != 0 && type == F_SOFTLOCK) {
974 972 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
975 973 if (addrsav >= seg->s_base + seg->s_size)
976 974 seg = AS_SEGNEXT(as, seg);
977 975 ASSERT(seg != NULL);
978 976 /*
979 977 * Now call the fault routine again to perform the
980 978 * unlock using S_OTHER instead of the rw variable
981 979 * since we never got a chance to touch the pages.
982 980 */
983 981 if (raddr > seg->s_base + seg->s_size)
984 982 ssize = seg->s_base + seg->s_size - addrsav;
985 983 else
986 984 ssize = raddr - addrsav;
987 985 (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
988 986 F_SOFTUNLOCK, S_OTHER);
989 987 }
990 988 }
991 989 if (as_lock_held)
992 990 AS_LOCK_EXIT(as);
993 991 if (lwp != NULL)
994 992 lwp->lwp_nostop--;
995 993
996 994 /*
997 995 * If the lower levels returned EDEADLK for a fault,
998 996 * It means that we should retry the fault. Let's wait
999 997 * a bit also to let the deadlock causing condition clear.
1000 998 * This is part of a gross hack to work around a design flaw
1001 999 * in the ufs/sds logging code and should go away when the
1002 1000 * logging code is re-designed to fix the problem. See bug
1003 1001 * 4125102 for details of the problem.
1004 1002 */
1005 1003 if (FC_ERRNO(res) == EDEADLK) {
1006 1004 delay(deadlk_wait);
1007 1005 res = 0;
1008 1006 goto retry;
1009 1007 }
1010 1008 return (res);
1011 1009 }
1012 1010
1013 1011
1014 1012
1015 1013 /*
1016 1014 * Asynchronous ``fault'' at addr for size bytes.
1017 1015 */
1018 1016 faultcode_t
1019 1017 as_faulta(struct as *as, caddr_t addr, size_t size)
1020 1018 {
1021 1019 struct seg *seg;
1022 1020 caddr_t raddr; /* rounded down addr */
1023 1021 size_t rsize; /* rounded up size */
1024 1022 faultcode_t res = 0;
1025 1023 klwp_t *lwp = ttolwp(curthread);
1026 1024
1027 1025 retry:
1028 1026 /*
1029 1027 * Indicate that the lwp is not to be stopped while waiting
1030 1028 * for a pagefault. This is to avoid deadlock while debugging
1031 1029 * a process via /proc over NFS (in particular).
1032 1030 */
1033 1031 if (lwp != NULL)
1034 1032 lwp->lwp_nostop++;
1035 1033
1036 1034 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1037 1035 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1038 1036 (size_t)raddr;
1039 1037
1040 1038 AS_LOCK_ENTER(as, RW_READER);
1041 1039 seg = as_segat(as, raddr);
1042 1040 if (seg == NULL) {
1043 1041 AS_LOCK_EXIT(as);
1044 1042 if (lwp != NULL)
1045 1043 lwp->lwp_nostop--;
1046 1044 return (FC_NOMAP);
1047 1045 }
1048 1046
1049 1047 for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1050 1048 if (raddr >= seg->s_base + seg->s_size) {
1051 1049 seg = AS_SEGNEXT(as, seg);
1052 1050 if (seg == NULL || raddr != seg->s_base) {
1053 1051 res = FC_NOMAP;
1054 1052 break;
1055 1053 }
1056 1054 }
1057 1055 res = SEGOP_FAULTA(seg, raddr);
1058 1056 if (res != 0)
1059 1057 break;
1060 1058 }
1061 1059 AS_LOCK_EXIT(as);
1062 1060 if (lwp != NULL)
1063 1061 lwp->lwp_nostop--;
1064 1062 /*
1065 1063 * If the lower levels returned EDEADLK for a fault,
1066 1064 * It means that we should retry the fault. Let's wait
1067 1065 * a bit also to let the deadlock causing condition clear.
1068 1066 * This is part of a gross hack to work around a design flaw
1069 1067 * in the ufs/sds logging code and should go away when the
1070 1068 * logging code is re-designed to fix the problem. See bug
1071 1069 * 4125102 for details of the problem.
1072 1070 */
1073 1071 if (FC_ERRNO(res) == EDEADLK) {
1074 1072 delay(deadlk_wait);
1075 1073 res = 0;
1076 1074 goto retry;
1077 1075 }
1078 1076 return (res);
1079 1077 }
1080 1078
1081 1079 /*
1082 1080 * Set the virtual mapping for the interval from [addr : addr + size)
1083 1081 * in address space `as' to have the specified protection.
1084 1082 * It is ok for the range to cross over several segments,
1085 1083 * as long as they are contiguous.
1086 1084 */
1087 1085 int
1088 1086 as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
1089 1087 {
1090 1088 struct seg *seg;
1091 1089 struct as_callback *cb;
1092 1090 size_t ssize;
1093 1091 caddr_t raddr; /* rounded down addr */
1094 1092 size_t rsize; /* rounded up size */
1095 1093 int error = 0, writer = 0;
1096 1094 caddr_t saveraddr;
1097 1095 size_t saversize;
1098 1096
1099 1097 setprot_top:
1100 1098 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1101 1099 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1102 1100 (size_t)raddr;
1103 1101
1104 1102 if (raddr + rsize < raddr) /* check for wraparound */
1105 1103 return (ENOMEM);
1106 1104
1107 1105 saveraddr = raddr;
1108 1106 saversize = rsize;
1109 1107
1110 1108 /*
1111 1109 * Normally we only lock the as as a reader. But
1112 1110 * if due to setprot the segment driver needs to split
1113 1111 * a segment it will return IE_RETRY. Therefore we re-acquire
1114 1112 * the as lock as a writer so the segment driver can change
1115 1113 * the seg list. Also the segment driver will return IE_RETRY
1116 1114 * after it has changed the segment list so we therefore keep
1117 1115 * locking as a writer. Since these opeartions should be rare
1118 1116 * want to only lock as a writer when necessary.
1119 1117 */
1120 1118 if (writer || avl_numnodes(&as->a_wpage) != 0) {
1121 1119 AS_LOCK_ENTER(as, RW_WRITER);
1122 1120 } else {
1123 1121 AS_LOCK_ENTER(as, RW_READER);
1124 1122 }
1125 1123
1126 1124 as_clearwatchprot(as, raddr, rsize);
1127 1125 seg = as_segat(as, raddr);
1128 1126 if (seg == NULL) {
1129 1127 as_setwatch(as);
1130 1128 AS_LOCK_EXIT(as);
1131 1129 return (ENOMEM);
1132 1130 }
1133 1131
1134 1132 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1135 1133 if (raddr >= seg->s_base + seg->s_size) {
1136 1134 seg = AS_SEGNEXT(as, seg);
1137 1135 if (seg == NULL || raddr != seg->s_base) {
1138 1136 error = ENOMEM;
1139 1137 break;
1140 1138 }
1141 1139 }
1142 1140 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1143 1141 ssize = seg->s_base + seg->s_size - raddr;
1144 1142 else
1145 1143 ssize = rsize;
1146 1144 retry:
1147 1145 error = SEGOP_SETPROT(seg, raddr, ssize, prot);
1148 1146
1149 1147 if (error == IE_NOMEM) {
1150 1148 error = EAGAIN;
1151 1149 break;
1152 1150 }
1153 1151
1154 1152 if (error == IE_RETRY) {
1155 1153 AS_LOCK_EXIT(as);
1156 1154 writer = 1;
1157 1155 goto setprot_top;
1158 1156 }
1159 1157
1160 1158 if (error == EAGAIN) {
1161 1159 /*
1162 1160 * Make sure we have a_lock as writer.
1163 1161 */
1164 1162 if (writer == 0) {
1165 1163 AS_LOCK_EXIT(as);
1166 1164 writer = 1;
1167 1165 goto setprot_top;
1168 1166 }
1169 1167
1170 1168 /*
1171 1169 * Memory is currently locked. It must be unlocked
1172 1170 * before this operation can succeed through a retry.
1173 1171 * The possible reasons for locked memory and
1174 1172 * corresponding strategies for unlocking are:
1175 1173 * (1) Normal I/O
1176 1174 * wait for a signal that the I/O operation
1177 1175 * has completed and the memory is unlocked.
1178 1176 * (2) Asynchronous I/O
1179 1177 * The aio subsystem does not unlock pages when
1180 1178 * the I/O is completed. Those pages are unlocked
1181 1179 * when the application calls aiowait/aioerror.
1182 1180 * So, to prevent blocking forever, cv_broadcast()
1183 1181 * is done to wake up aio_cleanup_thread.
1184 1182 * Subsequently, segvn_reclaim will be called, and
1185 1183 * that will do AS_CLRUNMAPWAIT() and wake us up.
1186 1184 * (3) Long term page locking:
1187 1185 * Drivers intending to have pages locked for a
1188 1186 * period considerably longer than for normal I/O
1189 1187 * (essentially forever) may have registered for a
1190 1188 * callback so they may unlock these pages on
1191 1189 * request. This is needed to allow this operation
1192 1190 * to succeed. Each entry on the callback list is
1193 1191 * examined. If the event or address range pertains
1194 1192 * the callback is invoked (unless it already is in
1195 1193 * progress). The a_contents lock must be dropped
1196 1194 * before the callback, so only one callback can
1197 1195 * be done at a time. Go to the top and do more
1198 1196 * until zero is returned. If zero is returned,
1199 1197 * either there were no callbacks for this event
1200 1198 * or they were already in progress.
1201 1199 */
1202 1200 mutex_enter(&as->a_contents);
1203 1201 if (as->a_callbacks &&
1204 1202 (cb = as_find_callback(as, AS_SETPROT_EVENT,
1205 1203 seg->s_base, seg->s_size))) {
1206 1204 AS_LOCK_EXIT(as);
1207 1205 as_execute_callback(as, cb, AS_SETPROT_EVENT);
1208 1206 } else if (!AS_ISNOUNMAPWAIT(as)) {
1209 1207 if (AS_ISUNMAPWAIT(as) == 0)
1210 1208 cv_broadcast(&as->a_cv);
1211 1209 AS_SETUNMAPWAIT(as);
1212 1210 AS_LOCK_EXIT(as);
1213 1211 while (AS_ISUNMAPWAIT(as))
1214 1212 cv_wait(&as->a_cv, &as->a_contents);
1215 1213 } else {
1216 1214 /*
1217 1215 * We may have raced with
1218 1216 * segvn_reclaim()/segspt_reclaim(). In this
1219 1217 * case clean nounmapwait flag and retry since
1220 1218 * softlockcnt in this segment may be already
1221 1219 * 0. We don't drop as writer lock so our
1222 1220 * number of retries without sleeping should
1223 1221 * be very small. See segvn_reclaim() for
1224 1222 * more comments.
1225 1223 */
1226 1224 AS_CLRNOUNMAPWAIT(as);
1227 1225 mutex_exit(&as->a_contents);
1228 1226 goto retry;
1229 1227 }
1230 1228 mutex_exit(&as->a_contents);
1231 1229 goto setprot_top;
1232 1230 } else if (error != 0)
1233 1231 break;
1234 1232 }
1235 1233 if (error != 0) {
1236 1234 as_setwatch(as);
1237 1235 } else {
1238 1236 as_setwatchprot(as, saveraddr, saversize, prot);
1239 1237 }
1240 1238 AS_LOCK_EXIT(as);
1241 1239 return (error);
1242 1240 }
1243 1241
1244 1242 /*
1245 1243 * Check to make sure that the interval [addr, addr + size)
1246 1244 * in address space `as' has at least the specified protection.
1247 1245 * It is ok for the range to cross over several segments, as long
1248 1246 * as they are contiguous.
1249 1247 */
1250 1248 int
1251 1249 as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
1252 1250 {
1253 1251 struct seg *seg;
1254 1252 size_t ssize;
1255 1253 caddr_t raddr; /* rounded down addr */
1256 1254 size_t rsize; /* rounded up size */
1257 1255 int error = 0;
1258 1256
1259 1257 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1260 1258 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1261 1259 (size_t)raddr;
1262 1260
1263 1261 if (raddr + rsize < raddr) /* check for wraparound */
1264 1262 return (ENOMEM);
1265 1263
1266 1264 /*
1267 1265 * This is ugly as sin...
1268 1266 * Normally, we only acquire the address space readers lock.
1269 1267 * However, if the address space has watchpoints present,
1270 1268 * we must acquire the writer lock on the address space for
1271 1269 * the benefit of as_clearwatchprot() and as_setwatchprot().
1272 1270 */
1273 1271 if (avl_numnodes(&as->a_wpage) != 0)
1274 1272 AS_LOCK_ENTER(as, RW_WRITER);
1275 1273 else
1276 1274 AS_LOCK_ENTER(as, RW_READER);
1277 1275 as_clearwatchprot(as, raddr, rsize);
1278 1276 seg = as_segat(as, raddr);
1279 1277 if (seg == NULL) {
1280 1278 as_setwatch(as);
1281 1279 AS_LOCK_EXIT(as);
1282 1280 return (ENOMEM);
1283 1281 }
1284 1282
1285 1283 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1286 1284 if (raddr >= seg->s_base + seg->s_size) {
1287 1285 seg = AS_SEGNEXT(as, seg);
1288 1286 if (seg == NULL || raddr != seg->s_base) {
1289 1287 error = ENOMEM;
1290 1288 break;
1291 1289 }
1292 1290 }
1293 1291 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1294 1292 ssize = seg->s_base + seg->s_size - raddr;
1295 1293 else
1296 1294 ssize = rsize;
1297 1295
1298 1296 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot);
1299 1297 if (error != 0)
1300 1298 break;
1301 1299 }
1302 1300 as_setwatch(as);
1303 1301 AS_LOCK_EXIT(as);
1304 1302 return (error);
1305 1303 }
1306 1304
1307 1305 int
1308 1306 as_unmap(struct as *as, caddr_t addr, size_t size)
1309 1307 {
1310 1308 struct seg *seg, *seg_next;
1311 1309 struct as_callback *cb;
1312 1310 caddr_t raddr, eaddr;
1313 1311 size_t ssize, rsize = 0;
1314 1312 int err;
1315 1313
1316 1314 top:
1317 1315 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1318 1316 eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &
1319 1317 (uintptr_t)PAGEMASK);
1320 1318
1321 1319 AS_LOCK_ENTER(as, RW_WRITER);
1322 1320
1323 1321 as->a_updatedir = 1; /* inform /proc */
1324 1322 gethrestime(&as->a_updatetime);
1325 1323
1326 1324 /*
1327 1325 * Use as_findseg to find the first segment in the range, then
1328 1326 * step through the segments in order, following s_next.
1329 1327 */
1330 1328 as_clearwatchprot(as, raddr, eaddr - raddr);
1331 1329
1332 1330 for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) {
1333 1331 if (eaddr <= seg->s_base)
1334 1332 break; /* eaddr was in a gap; all done */
1335 1333
1336 1334 /* this is implied by the test above */
1337 1335 ASSERT(raddr < eaddr);
1338 1336
1339 1337 if (raddr < seg->s_base)
1340 1338 raddr = seg->s_base; /* raddr was in a gap */
1341 1339
1342 1340 if (eaddr > (seg->s_base + seg->s_size))
1343 1341 ssize = seg->s_base + seg->s_size - raddr;
1344 1342 else
1345 1343 ssize = eaddr - raddr;
1346 1344
1347 1345 /*
1348 1346 * Save next segment pointer since seg can be
1349 1347 * destroyed during the segment unmap operation.
1350 1348 */
1351 1349 seg_next = AS_SEGNEXT(as, seg);
1352 1350
1353 1351 /*
1354 1352 * We didn't count /dev/null mappings, so ignore them here.
1355 1353 * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again,
1356 1354 * we have to do this check here while we have seg.)
1357 1355 */
1358 1356 rsize = 0;
1359 1357 if (!SEG_IS_DEVNULL_MAPPING(seg) &&
1360 1358 !SEG_IS_PARTIAL_RESV(seg))
1361 1359 rsize = ssize;
1362 1360
1363 1361 retry:
1364 1362 err = SEGOP_UNMAP(seg, raddr, ssize);
1365 1363 if (err == EAGAIN) {
1366 1364 /*
1367 1365 * Memory is currently locked. It must be unlocked
1368 1366 * before this operation can succeed through a retry.
1369 1367 * The possible reasons for locked memory and
1370 1368 * corresponding strategies for unlocking are:
1371 1369 * (1) Normal I/O
1372 1370 * wait for a signal that the I/O operation
1373 1371 * has completed and the memory is unlocked.
1374 1372 * (2) Asynchronous I/O
1375 1373 * The aio subsystem does not unlock pages when
1376 1374 * the I/O is completed. Those pages are unlocked
1377 1375 * when the application calls aiowait/aioerror.
1378 1376 * So, to prevent blocking forever, cv_broadcast()
1379 1377 * is done to wake up aio_cleanup_thread.
1380 1378 * Subsequently, segvn_reclaim will be called, and
1381 1379 * that will do AS_CLRUNMAPWAIT() and wake us up.
1382 1380 * (3) Long term page locking:
1383 1381 * Drivers intending to have pages locked for a
1384 1382 * period considerably longer than for normal I/O
1385 1383 * (essentially forever) may have registered for a
1386 1384 * callback so they may unlock these pages on
1387 1385 * request. This is needed to allow this operation
1388 1386 * to succeed. Each entry on the callback list is
1389 1387 * examined. If the event or address range pertains
1390 1388 * the callback is invoked (unless it already is in
1391 1389 * progress). The a_contents lock must be dropped
1392 1390 * before the callback, so only one callback can
1393 1391 * be done at a time. Go to the top and do more
1394 1392 * until zero is returned. If zero is returned,
1395 1393 * either there were no callbacks for this event
1396 1394 * or they were already in progress.
1397 1395 */
1398 1396 mutex_enter(&as->a_contents);
1399 1397 if (as->a_callbacks &&
1400 1398 (cb = as_find_callback(as, AS_UNMAP_EVENT,
1401 1399 seg->s_base, seg->s_size))) {
1402 1400 AS_LOCK_EXIT(as);
1403 1401 as_execute_callback(as, cb, AS_UNMAP_EVENT);
1404 1402 } else if (!AS_ISNOUNMAPWAIT(as)) {
1405 1403 if (AS_ISUNMAPWAIT(as) == 0)
1406 1404 cv_broadcast(&as->a_cv);
1407 1405 AS_SETUNMAPWAIT(as);
1408 1406 AS_LOCK_EXIT(as);
1409 1407 while (AS_ISUNMAPWAIT(as))
1410 1408 cv_wait(&as->a_cv, &as->a_contents);
1411 1409 } else {
1412 1410 /*
1413 1411 * We may have raced with
1414 1412 * segvn_reclaim()/segspt_reclaim(). In this
1415 1413 * case clean nounmapwait flag and retry since
1416 1414 * softlockcnt in this segment may be already
1417 1415 * 0. We don't drop as writer lock so our
1418 1416 * number of retries without sleeping should
1419 1417 * be very small. See segvn_reclaim() for
1420 1418 * more comments.
1421 1419 */
1422 1420 AS_CLRNOUNMAPWAIT(as);
1423 1421 mutex_exit(&as->a_contents);
1424 1422 goto retry;
1425 1423 }
1426 1424 mutex_exit(&as->a_contents);
1427 1425 goto top;
1428 1426 } else if (err == IE_RETRY) {
1429 1427 AS_LOCK_EXIT(as);
1430 1428 goto top;
1431 1429 } else if (err) {
1432 1430 as_setwatch(as);
1433 1431 AS_LOCK_EXIT(as);
1434 1432 return (-1);
1435 1433 }
1436 1434
1437 1435 as->a_size -= ssize;
1438 1436 if (rsize)
1439 1437 as->a_resvsize -= rsize;
1440 1438 raddr += ssize;
1441 1439 }
1442 1440 AS_LOCK_EXIT(as);
1443 1441 return (0);
1444 1442 }
1445 1443
1446 1444 static int
1447 1445 as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec,
1448 1446 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1449 1447 {
1450 1448 uint_t szc;
1451 1449 uint_t nszc;
1452 1450 int error;
1453 1451 caddr_t a;
1454 1452 caddr_t eaddr;
1455 1453 size_t segsize;
1456 1454 struct seg *seg;
1457 1455 size_t pgsz;
1458 1456 int do_off = (vn_a->vp != NULL || vn_a->amp != NULL);
1459 1457 uint_t save_szcvec;
1460 1458
1461 1459 ASSERT(AS_WRITE_HELD(as));
1462 1460 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1463 1461 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1464 1462 ASSERT(vn_a->vp == NULL || vn_a->amp == NULL);
1465 1463 if (!do_off) {
1466 1464 vn_a->offset = 0;
1467 1465 }
1468 1466
1469 1467 if (szcvec <= 1) {
1470 1468 seg = seg_alloc(as, addr, size);
1471 1469 if (seg == NULL) {
1472 1470 return (ENOMEM);
1473 1471 }
1474 1472 vn_a->szc = 0;
1475 1473 error = (*crfp)(seg, vn_a);
1476 1474 if (error != 0) {
1477 1475 seg_free(seg);
1478 1476 } else {
1479 1477 as->a_size += size;
1480 1478 as->a_resvsize += size;
1481 1479 }
1482 1480 return (error);
1483 1481 }
1484 1482
1485 1483 eaddr = addr + size;
1486 1484 save_szcvec = szcvec;
1487 1485 szcvec >>= 1;
1488 1486 szc = 0;
1489 1487 nszc = 0;
1490 1488 while (szcvec) {
1491 1489 if ((szcvec & 0x1) == 0) {
1492 1490 nszc++;
1493 1491 szcvec >>= 1;
1494 1492 continue;
1495 1493 }
1496 1494 nszc++;
1497 1495 pgsz = page_get_pagesize(nszc);
1498 1496 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
1499 1497 if (a != addr) {
1500 1498 ASSERT(a < eaddr);
1501 1499 segsize = a - addr;
1502 1500 seg = seg_alloc(as, addr, segsize);
1503 1501 if (seg == NULL) {
1504 1502 return (ENOMEM);
1505 1503 }
1506 1504 vn_a->szc = szc;
1507 1505 error = (*crfp)(seg, vn_a);
1508 1506 if (error != 0) {
1509 1507 seg_free(seg);
1510 1508 return (error);
1511 1509 }
1512 1510 as->a_size += segsize;
1513 1511 as->a_resvsize += segsize;
1514 1512 *segcreated = 1;
1515 1513 if (do_off) {
1516 1514 vn_a->offset += segsize;
1517 1515 }
1518 1516 addr = a;
1519 1517 }
1520 1518 szc = nszc;
1521 1519 szcvec >>= 1;
1522 1520 }
1523 1521
1524 1522 ASSERT(addr < eaddr);
1525 1523 szcvec = save_szcvec | 1; /* add 8K pages */
1526 1524 while (szcvec) {
1527 1525 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
1528 1526 ASSERT(a >= addr);
1529 1527 if (a != addr) {
1530 1528 segsize = a - addr;
1531 1529 seg = seg_alloc(as, addr, segsize);
1532 1530 if (seg == NULL) {
1533 1531 return (ENOMEM);
1534 1532 }
1535 1533 vn_a->szc = szc;
1536 1534 error = (*crfp)(seg, vn_a);
1537 1535 if (error != 0) {
1538 1536 seg_free(seg);
1539 1537 return (error);
1540 1538 }
1541 1539 as->a_size += segsize;
1542 1540 as->a_resvsize += segsize;
1543 1541 *segcreated = 1;
1544 1542 if (do_off) {
1545 1543 vn_a->offset += segsize;
1546 1544 }
1547 1545 addr = a;
1548 1546 }
1549 1547 szcvec &= ~(1 << szc);
1550 1548 if (szcvec) {
1551 1549 szc = highbit(szcvec) - 1;
1552 1550 pgsz = page_get_pagesize(szc);
1553 1551 }
1554 1552 }
1555 1553 ASSERT(addr == eaddr);
1556 1554
1557 1555 return (0);
1558 1556 }
1559 1557
1560 1558 static int
1561 1559 as_map_vnsegs(struct as *as, caddr_t addr, size_t size,
1562 1560 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1563 1561 {
1564 1562 uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA);
1565 1563 int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
1566 1564 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
1567 1565 type, 0);
1568 1566 int error;
1569 1567 struct seg *seg;
1570 1568 struct vattr va;
1571 1569 u_offset_t eoff;
1572 1570 size_t save_size = 0;
1573 1571 extern size_t textrepl_size_thresh;
1574 1572
1575 1573 ASSERT(AS_WRITE_HELD(as));
1576 1574 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1577 1575 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1578 1576 ASSERT(vn_a->vp != NULL);
1579 1577 ASSERT(vn_a->amp == NULL);
1580 1578
1581 1579 again:
1582 1580 if (szcvec <= 1) {
1583 1581 seg = seg_alloc(as, addr, size);
1584 1582 if (seg == NULL) {
1585 1583 return (ENOMEM);
1586 1584 }
1587 1585 vn_a->szc = 0;
1588 1586 error = (*crfp)(seg, vn_a);
1589 1587 if (error != 0) {
1590 1588 seg_free(seg);
1591 1589 } else {
1592 1590 as->a_size += size;
1593 1591 as->a_resvsize += size;
1594 1592 }
1595 1593 return (error);
1596 1594 }
1597 1595
1598 1596 va.va_mask = AT_SIZE;
1599 1597 if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred, NULL) != 0) {
1600 1598 szcvec = 0;
1601 1599 goto again;
1602 1600 }
1603 1601 eoff = vn_a->offset & PAGEMASK;
1604 1602 if (eoff >= va.va_size) {
1605 1603 szcvec = 0;
1606 1604 goto again;
1607 1605 }
1608 1606 eoff += size;
1609 1607 if (btopr(va.va_size) < btopr(eoff)) {
1610 1608 save_size = size;
1611 1609 size = va.va_size - (vn_a->offset & PAGEMASK);
1612 1610 size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t);
1613 1611 szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
1614 1612 type, 0);
1615 1613 if (szcvec <= 1) {
1616 1614 size = save_size;
1617 1615 goto again;
1618 1616 }
1619 1617 }
1620 1618
1621 1619 if (size > textrepl_size_thresh) {
1622 1620 vn_a->flags |= _MAP_TEXTREPL;
1623 1621 }
1624 1622 error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a,
1625 1623 segcreated);
1626 1624 if (error != 0) {
1627 1625 return (error);
1628 1626 }
1629 1627 if (save_size) {
1630 1628 addr += size;
1631 1629 size = save_size - size;
1632 1630 szcvec = 0;
1633 1631 goto again;
1634 1632 }
1635 1633 return (0);
1636 1634 }
1637 1635
1638 1636 /*
1639 1637 * as_map_ansegs: shared or private anonymous memory. Note that the flags
1640 1638 * passed to map_pgszvec cannot be MAP_INITDATA, for anon.
1641 1639 */
1642 1640 static int
1643 1641 as_map_ansegs(struct as *as, caddr_t addr, size_t size,
1644 1642 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1645 1643 {
1646 1644 uint_t szcvec;
1647 1645 uchar_t type;
1648 1646
1649 1647 ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE);
1650 1648 if (vn_a->type == MAP_SHARED) {
1651 1649 type = MAPPGSZC_SHM;
1652 1650 } else if (vn_a->type == MAP_PRIVATE) {
1653 1651 if (vn_a->szc == AS_MAP_HEAP) {
1654 1652 type = MAPPGSZC_HEAP;
1655 1653 } else if (vn_a->szc == AS_MAP_STACK) {
1656 1654 type = MAPPGSZC_STACK;
1657 1655 } else {
1658 1656 type = MAPPGSZC_PRIVM;
1659 1657 }
1660 1658 }
1661 1659 szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ?
1662 1660 (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE),
1663 1661 (vn_a->flags & MAP_TEXT), type, 0);
1664 1662 ASSERT(AS_WRITE_HELD(as));
1665 1663 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1666 1664 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1667 1665 ASSERT(vn_a->vp == NULL);
1668 1666
1669 1667 return (as_map_segvn_segs(as, addr, size, szcvec,
1670 1668 crfp, vn_a, segcreated));
1671 1669 }
1672 1670
1673 1671 int
1674 1672 as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp)
1675 1673 {
1676 1674 AS_LOCK_ENTER(as, RW_WRITER);
1677 1675 return (as_map_locked(as, addr, size, crfp, argsp));
1678 1676 }
1679 1677
1680 1678 int
1681 1679 as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(),
1682 1680 void *argsp)
1683 1681 {
1684 1682 struct seg *seg = NULL;
1685 1683 caddr_t raddr; /* rounded down addr */
1686 1684 size_t rsize; /* rounded up size */
1687 1685 int error;
1688 1686 int unmap = 0;
1689 1687 /*
1690 1688 * The use of a_proc is preferred to handle the case where curproc is
1691 1689 * a door_call server and is allocating memory in the client's (a_proc)
1692 1690 * address space.
1693 1691 * When creating a shared memory segment a_proc will be NULL so we
1694 1692 * fallback to curproc in that case.
1695 1693 */
1696 1694 struct proc *p = (as->a_proc == NULL) ? curproc : as->a_proc;
1697 1695 struct segvn_crargs crargs;
1698 1696
1699 1697 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1700 1698 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1701 1699 (size_t)raddr;
1702 1700
1703 1701 /*
|
↓ open down ↓ |
787 lines elided |
↑ open up ↑ |
1704 1702 * check for wrap around
1705 1703 */
1706 1704 if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) {
1707 1705 AS_LOCK_EXIT(as);
1708 1706 return (ENOMEM);
1709 1707 }
1710 1708
1711 1709 as->a_updatedir = 1; /* inform /proc */
1712 1710 gethrestime(&as->a_updatetime);
1713 1711
1714 - if (as != &kas) {
1715 - if (as->a_size + rsize > (size_t)p->p_vmem_ctl) {
1716 - AS_LOCK_EXIT(as);
1712 + if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) {
1713 + AS_LOCK_EXIT(as);
1717 1714
1718 - (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM],
1719 - p->p_rctls, p, RCA_UNSAFE_ALL);
1720 - return (ENOMEM);
1721 - }
1715 + (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1716 + RCA_UNSAFE_ALL);
1722 1717
1723 - /*
1724 - * Keep the number of segments in a userspace AS constrained to
1725 - * a reasonable limit. Linux enforces a value slightly less
1726 - * than 64k in order to avoid ELF limits if/when a process
1727 - * dumps core. While SunOS avoids that specific problem with
1728 - * other tricks, the limit is still valuable to keep kernel
1729 - * memory consumption in check.
1730 - */
1731 - if (avl_numnodes(&as->a_segtree) >= as_user_seg_limit) {
1732 - AS_LOCK_EXIT(as);
1733 - atomic_inc_32(&p->p_zone->zone_mfseglim);
1734 - return (ENOMEM);
1735 - }
1718 + return (ENOMEM);
1736 1719 }
1737 1720
1738 1721 if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) {
1739 1722 crargs = *(struct segvn_crargs *)argsp;
1740 1723 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap);
1741 1724 if (error != 0) {
1742 1725 AS_LOCK_EXIT(as);
1743 1726 if (unmap) {
1744 1727 (void) as_unmap(as, addr, size);
1745 1728 }
1746 1729 return (error);
1747 1730 }
1748 1731 } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) {
1749 1732 crargs = *(struct segvn_crargs *)argsp;
1750 1733 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap);
1751 1734 if (error != 0) {
1752 1735 AS_LOCK_EXIT(as);
1753 1736 if (unmap) {
1754 1737 (void) as_unmap(as, addr, size);
1755 1738 }
1756 1739 return (error);
1757 1740 }
1758 1741 } else {
1759 1742 seg = seg_alloc(as, addr, size);
1760 1743 if (seg == NULL) {
1761 1744 AS_LOCK_EXIT(as);
1762 1745 return (ENOMEM);
1763 1746 }
1764 1747
1765 1748 error = (*crfp)(seg, argsp);
1766 1749 if (error != 0) {
1767 1750 seg_free(seg);
1768 1751 AS_LOCK_EXIT(as);
1769 1752 return (error);
1770 1753 }
1771 1754 /*
1772 1755 * Add size now so as_unmap will work if as_ctl fails.
1773 1756 */
1774 1757 as->a_size += rsize;
1775 1758 as->a_resvsize += rsize;
1776 1759 }
1777 1760
1778 1761 as_setwatch(as);
1779 1762
1780 1763 /*
1781 1764 * If the address space is locked,
1782 1765 * establish memory locks for the new segment.
1783 1766 */
1784 1767 mutex_enter(&as->a_contents);
1785 1768 if (AS_ISPGLCK(as)) {
1786 1769 mutex_exit(&as->a_contents);
1787 1770 AS_LOCK_EXIT(as);
1788 1771 error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0);
1789 1772 if (error != 0)
1790 1773 (void) as_unmap(as, addr, size);
1791 1774 } else {
1792 1775 mutex_exit(&as->a_contents);
1793 1776 AS_LOCK_EXIT(as);
1794 1777 }
1795 1778 return (error);
1796 1779 }
1797 1780
1798 1781
1799 1782 /*
1800 1783 * Delete all segments in the address space marked with S_PURGE.
1801 1784 * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c).
1802 1785 * These segments are deleted as a first step before calls to as_gap(), so
1803 1786 * that they don't affect mmap() or shmat().
1804 1787 */
1805 1788 void
1806 1789 as_purge(struct as *as)
1807 1790 {
1808 1791 struct seg *seg;
1809 1792 struct seg *next_seg;
1810 1793
1811 1794 /*
1812 1795 * the setting of NEEDSPURGE is protect by as_rangelock(), so
1813 1796 * no need to grab a_contents mutex for this check
1814 1797 */
1815 1798 if ((as->a_flags & AS_NEEDSPURGE) == 0)
1816 1799 return;
1817 1800
1818 1801 AS_LOCK_ENTER(as, RW_WRITER);
1819 1802 next_seg = NULL;
1820 1803 seg = AS_SEGFIRST(as);
1821 1804 while (seg != NULL) {
1822 1805 next_seg = AS_SEGNEXT(as, seg);
1823 1806 if (seg->s_flags & S_PURGE)
1824 1807 SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1825 1808 seg = next_seg;
1826 1809 }
1827 1810 AS_LOCK_EXIT(as);
1828 1811
1829 1812 mutex_enter(&as->a_contents);
1830 1813 as->a_flags &= ~AS_NEEDSPURGE;
1831 1814 mutex_exit(&as->a_contents);
1832 1815 }
1833 1816
1834 1817 /*
1835 1818 * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1836 1819 * range of addresses at least "minlen" long, where the base of the range is
1837 1820 * at "off" phase from an "align" boundary and there is space for a
1838 1821 * "redzone"-sized redzone on eithe rside of the range. Thus,
1839 1822 * if align was 4M and off was 16k, the user wants a hole which will start
1840 1823 * 16k into a 4M page.
1841 1824 *
1842 1825 * If flags specifies AH_HI, the hole will have the highest possible address
1843 1826 * in the range. We use the as->a_lastgap field to figure out where to
1844 1827 * start looking for a gap.
1845 1828 *
1846 1829 * Otherwise, the gap will have the lowest possible address.
1847 1830 *
1848 1831 * If flags specifies AH_CONTAIN, the hole will contain the address addr.
1849 1832 *
1850 1833 * If an adequate hole is found, *basep and *lenp are set to reflect the part of
1851 1834 * the hole that is within range, and 0 is returned. On failure, -1 is returned.
1852 1835 *
1853 1836 * NOTE: This routine is not correct when base+len overflows caddr_t.
1854 1837 */
1855 1838 int
1856 1839 as_gap_aligned(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp,
1857 1840 uint_t flags, caddr_t addr, size_t align, size_t redzone, size_t off)
1858 1841 {
1859 1842 caddr_t lobound = *basep;
1860 1843 caddr_t hibound = lobound + *lenp;
1861 1844 struct seg *lseg, *hseg;
1862 1845 caddr_t lo, hi;
1863 1846 int forward;
1864 1847 caddr_t save_base;
1865 1848 size_t save_len;
1866 1849 size_t save_minlen;
1867 1850 size_t save_redzone;
1868 1851 int fast_path = 1;
1869 1852
1870 1853 save_base = *basep;
1871 1854 save_len = *lenp;
1872 1855 save_minlen = minlen;
1873 1856 save_redzone = redzone;
1874 1857
1875 1858 /*
1876 1859 * For the first pass/fast_path, just add align and redzone into
1877 1860 * minlen since if we get an allocation, we can guarantee that it
1878 1861 * will fit the alignment and redzone requested.
1879 1862 * This increases the chance that hibound will be adjusted to
1880 1863 * a_lastgap->s_base which will likely allow us to find an
1881 1864 * acceptable hole in the address space quicker.
1882 1865 * If we can't find a hole with this fast_path, then we look for
1883 1866 * smaller holes in which the alignment and offset may allow
1884 1867 * the allocation to fit.
1885 1868 */
1886 1869 minlen += align;
1887 1870 minlen += 2 * redzone;
1888 1871 redzone = 0;
1889 1872
1890 1873 AS_LOCK_ENTER(as, RW_READER);
1891 1874 if (AS_SEGFIRST(as) == NULL) {
1892 1875 if (valid_va_range_aligned(basep, lenp, minlen, flags & AH_DIR,
1893 1876 align, redzone, off)) {
1894 1877 AS_LOCK_EXIT(as);
1895 1878 return (0);
1896 1879 } else {
1897 1880 AS_LOCK_EXIT(as);
1898 1881 *basep = save_base;
1899 1882 *lenp = save_len;
1900 1883 return (-1);
1901 1884 }
1902 1885 }
1903 1886
1904 1887 retry:
1905 1888 /*
1906 1889 * Set up to iterate over all the inter-segment holes in the given
1907 1890 * direction. lseg is NULL for the lowest-addressed hole and hseg is
1908 1891 * NULL for the highest-addressed hole. If moving backwards, we reset
1909 1892 * sseg to denote the highest-addressed segment.
1910 1893 */
1911 1894 forward = (flags & AH_DIR) == AH_LO;
1912 1895 if (forward) {
1913 1896 hseg = as_findseg(as, lobound, 1);
1914 1897 lseg = AS_SEGPREV(as, hseg);
1915 1898 } else {
1916 1899
1917 1900 /*
1918 1901 * If allocating at least as much as the last allocation,
1919 1902 * use a_lastgap's base as a better estimate of hibound.
1920 1903 */
1921 1904 if (as->a_lastgap &&
1922 1905 minlen >= as->a_lastgap->s_size &&
1923 1906 hibound >= as->a_lastgap->s_base)
1924 1907 hibound = as->a_lastgap->s_base;
1925 1908
1926 1909 hseg = as_findseg(as, hibound, 1);
1927 1910 if (hseg->s_base + hseg->s_size < hibound) {
1928 1911 lseg = hseg;
1929 1912 hseg = NULL;
1930 1913 } else {
1931 1914 lseg = AS_SEGPREV(as, hseg);
1932 1915 }
1933 1916 }
1934 1917
1935 1918 for (;;) {
1936 1919 /*
1937 1920 * Set lo and hi to the hole's boundaries. (We should really
1938 1921 * use MAXADDR in place of hibound in the expression below,
1939 1922 * but can't express it easily; using hibound in its place is
1940 1923 * harmless.)
1941 1924 */
1942 1925 lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size;
1943 1926 hi = (hseg == NULL) ? hibound : hseg->s_base;
1944 1927 /*
1945 1928 * If the iteration has moved past the interval from lobound
1946 1929 * to hibound it's pointless to continue.
1947 1930 */
1948 1931 if ((forward && lo > hibound) || (!forward && hi < lobound))
1949 1932 break;
1950 1933 else if (lo > hibound || hi < lobound)
1951 1934 goto cont;
1952 1935 /*
1953 1936 * Candidate hole lies at least partially within the allowable
1954 1937 * range. Restrict it to fall completely within that range,
1955 1938 * i.e., to [max(lo, lobound), min(hi, hibound)].
1956 1939 */
1957 1940 if (lo < lobound)
1958 1941 lo = lobound;
1959 1942 if (hi > hibound)
1960 1943 hi = hibound;
1961 1944 /*
1962 1945 * Verify that the candidate hole is big enough and meets
1963 1946 * hardware constraints. If the hole is too small, no need
1964 1947 * to do the further checks since they will fail.
1965 1948 */
1966 1949 *basep = lo;
1967 1950 *lenp = hi - lo;
1968 1951 if (*lenp >= minlen && valid_va_range_aligned(basep, lenp,
1969 1952 minlen, forward ? AH_LO : AH_HI, align, redzone, off) &&
1970 1953 ((flags & AH_CONTAIN) == 0 ||
1971 1954 (*basep <= addr && *basep + *lenp > addr))) {
1972 1955 if (!forward)
1973 1956 as->a_lastgap = hseg;
1974 1957 if (hseg != NULL)
1975 1958 as->a_lastgaphl = hseg;
1976 1959 else
1977 1960 as->a_lastgaphl = lseg;
1978 1961 AS_LOCK_EXIT(as);
1979 1962 return (0);
1980 1963 }
1981 1964 cont:
1982 1965 /*
1983 1966 * Move to the next hole.
1984 1967 */
1985 1968 if (forward) {
1986 1969 lseg = hseg;
1987 1970 if (lseg == NULL)
1988 1971 break;
1989 1972 hseg = AS_SEGNEXT(as, hseg);
1990 1973 } else {
1991 1974 hseg = lseg;
1992 1975 if (hseg == NULL)
1993 1976 break;
1994 1977 lseg = AS_SEGPREV(as, lseg);
1995 1978 }
1996 1979 }
1997 1980 if (fast_path && (align != 0 || save_redzone != 0)) {
1998 1981 fast_path = 0;
1999 1982 minlen = save_minlen;
2000 1983 redzone = save_redzone;
2001 1984 goto retry;
2002 1985 }
2003 1986 *basep = save_base;
2004 1987 *lenp = save_len;
2005 1988 AS_LOCK_EXIT(as);
2006 1989 return (-1);
2007 1990 }
2008 1991
2009 1992 /*
2010 1993 * Find a hole of at least size minlen within [*basep, *basep + *lenp).
2011 1994 *
2012 1995 * If flags specifies AH_HI, the hole will have the highest possible address
2013 1996 * in the range. We use the as->a_lastgap field to figure out where to
2014 1997 * start looking for a gap.
2015 1998 *
2016 1999 * Otherwise, the gap will have the lowest possible address.
2017 2000 *
2018 2001 * If flags specifies AH_CONTAIN, the hole will contain the address addr.
2019 2002 *
2020 2003 * If an adequate hole is found, base and len are set to reflect the part of
2021 2004 * the hole that is within range, and 0 is returned, otherwise,
2022 2005 * -1 is returned.
2023 2006 *
2024 2007 * NOTE: This routine is not correct when base+len overflows caddr_t.
2025 2008 */
2026 2009 int
2027 2010 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags,
2028 2011 caddr_t addr)
2029 2012 {
2030 2013
2031 2014 return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0));
2032 2015 }
2033 2016
2034 2017 /*
2035 2018 * Return the next range within [base, base + len) that is backed
2036 2019 * with "real memory". Skip holes and non-seg_vn segments.
2037 2020 * We're lazy and only return one segment at a time.
2038 2021 */
2039 2022 int
2040 2023 as_memory(struct as *as, caddr_t *basep, size_t *lenp)
2041 2024 {
2042 2025 extern struct seg_ops segspt_shmops; /* needs a header file */
2043 2026 struct seg *seg;
2044 2027 caddr_t addr, eaddr;
2045 2028 caddr_t segend;
2046 2029
2047 2030 AS_LOCK_ENTER(as, RW_READER);
2048 2031
2049 2032 addr = *basep;
2050 2033 eaddr = addr + *lenp;
2051 2034
2052 2035 seg = as_findseg(as, addr, 0);
2053 2036 if (seg != NULL)
2054 2037 addr = MAX(seg->s_base, addr);
2055 2038
2056 2039 for (;;) {
2057 2040 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) {
2058 2041 AS_LOCK_EXIT(as);
2059 2042 return (EINVAL);
2060 2043 }
2061 2044
2062 2045 if (seg->s_ops == &segvn_ops) {
2063 2046 segend = seg->s_base + seg->s_size;
2064 2047 break;
2065 2048 }
2066 2049
2067 2050 /*
2068 2051 * We do ISM by looking into the private data
2069 2052 * to determine the real size of the segment.
2070 2053 */
2071 2054 if (seg->s_ops == &segspt_shmops) {
2072 2055 segend = seg->s_base + spt_realsize(seg);
2073 2056 if (addr < segend)
2074 2057 break;
2075 2058 }
2076 2059
2077 2060 seg = AS_SEGNEXT(as, seg);
2078 2061
2079 2062 if (seg != NULL)
2080 2063 addr = seg->s_base;
2081 2064 }
2082 2065
2083 2066 *basep = addr;
2084 2067
2085 2068 if (segend > eaddr)
2086 2069 *lenp = eaddr - addr;
2087 2070 else
2088 2071 *lenp = segend - addr;
2089 2072
2090 2073 AS_LOCK_EXIT(as);
2091 2074 return (0);
2092 2075 }
2093 2076
2094 2077 /*
2095 2078 * Swap the pages associated with the address space as out to
2096 2079 * secondary storage, returning the number of bytes actually
2097 2080 * swapped.
2098 2081 *
2099 2082 * The value returned is intended to correlate well with the process's
2100 2083 * memory requirements. Its usefulness for this purpose depends on
2101 2084 * how well the segment-level routines do at returning accurate
2102 2085 * information.
2103 2086 */
2104 2087 size_t
2105 2088 as_swapout(struct as *as)
2106 2089 {
2107 2090 struct seg *seg;
2108 2091 size_t swpcnt = 0;
2109 2092
2110 2093 /*
2111 2094 * Kernel-only processes have given up their address
2112 2095 * spaces. Of course, we shouldn't be attempting to
2113 2096 * swap out such processes in the first place...
2114 2097 */
2115 2098 if (as == NULL)
2116 2099 return (0);
2117 2100
2118 2101 AS_LOCK_ENTER(as, RW_READER);
2119 2102
2120 2103 /*
2121 2104 * Free all mapping resources associated with the address
2122 2105 * space. The segment-level swapout routines capitalize
2123 2106 * on this unmapping by scavanging pages that have become
2124 2107 * unmapped here.
2125 2108 */
2126 2109 hat_swapout(as->a_hat);
2127 2110
2128 2111 /*
2129 2112 * Call the swapout routines of all segments in the address
2130 2113 * space to do the actual work, accumulating the amount of
2131 2114 * space reclaimed.
2132 2115 */
2133 2116 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2134 2117 struct seg_ops *ov = seg->s_ops;
2135 2118
2136 2119 /*
2137 2120 * We have to check to see if the seg has
2138 2121 * an ops vector because the seg may have
2139 2122 * been in the middle of being set up when
2140 2123 * the process was picked for swapout.
2141 2124 */
2142 2125 if ((ov != NULL) && (ov->swapout != NULL))
2143 2126 swpcnt += SEGOP_SWAPOUT(seg);
2144 2127 }
2145 2128 AS_LOCK_EXIT(as);
2146 2129 return (swpcnt);
2147 2130 }
2148 2131
2149 2132 /*
2150 2133 * Determine whether data from the mappings in interval [addr, addr + size)
2151 2134 * are in the primary memory (core) cache.
2152 2135 */
2153 2136 int
2154 2137 as_incore(struct as *as, caddr_t addr,
2155 2138 size_t size, char *vec, size_t *sizep)
2156 2139 {
2157 2140 struct seg *seg;
2158 2141 size_t ssize;
2159 2142 caddr_t raddr; /* rounded down addr */
2160 2143 size_t rsize; /* rounded up size */
2161 2144 size_t isize; /* iteration size */
2162 2145 int error = 0; /* result, assume success */
2163 2146
2164 2147 *sizep = 0;
2165 2148 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2166 2149 rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) -
2167 2150 (size_t)raddr;
2168 2151
2169 2152 if (raddr + rsize < raddr) /* check for wraparound */
2170 2153 return (ENOMEM);
2171 2154
2172 2155 AS_LOCK_ENTER(as, RW_READER);
2173 2156 seg = as_segat(as, raddr);
2174 2157 if (seg == NULL) {
2175 2158 AS_LOCK_EXIT(as);
2176 2159 return (-1);
2177 2160 }
2178 2161
2179 2162 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2180 2163 if (raddr >= seg->s_base + seg->s_size) {
2181 2164 seg = AS_SEGNEXT(as, seg);
2182 2165 if (seg == NULL || raddr != seg->s_base) {
2183 2166 error = -1;
2184 2167 break;
2185 2168 }
2186 2169 }
2187 2170 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2188 2171 ssize = seg->s_base + seg->s_size - raddr;
2189 2172 else
2190 2173 ssize = rsize;
2191 2174 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec);
2192 2175 if (isize != ssize) {
2193 2176 error = -1;
2194 2177 break;
2195 2178 }
2196 2179 vec += btopr(ssize);
2197 2180 }
2198 2181 AS_LOCK_EXIT(as);
2199 2182 return (error);
2200 2183 }
2201 2184
2202 2185 static void
2203 2186 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2204 2187 ulong_t *bitmap, size_t position, size_t npages)
2205 2188 {
2206 2189 caddr_t range_start;
2207 2190 size_t pos1 = position;
2208 2191 size_t pos2;
2209 2192 size_t size;
2210 2193 size_t end_pos = npages + position;
2211 2194
2212 2195 while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2213 2196 size = ptob((pos2 - pos1));
2214 2197 range_start = (caddr_t)((uintptr_t)addr +
2215 2198 ptob(pos1 - position));
2216 2199
2217 2200 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK,
2218 2201 (ulong_t *)NULL, (size_t)NULL);
2219 2202 pos1 = pos2;
2220 2203 }
2221 2204 }
2222 2205
2223 2206 static void
2224 2207 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map,
2225 2208 caddr_t raddr, size_t rsize)
2226 2209 {
2227 2210 struct seg *seg = as_segat(as, raddr);
2228 2211 size_t ssize;
2229 2212
2230 2213 while (rsize != 0) {
2231 2214 if (raddr >= seg->s_base + seg->s_size)
2232 2215 seg = AS_SEGNEXT(as, seg);
2233 2216
2234 2217 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2235 2218 ssize = seg->s_base + seg->s_size - raddr;
2236 2219 else
2237 2220 ssize = rsize;
2238 2221
2239 2222 as_segunlock(seg, raddr, attr, mlock_map, 0, btopr(ssize));
2240 2223
2241 2224 rsize -= ssize;
2242 2225 raddr += ssize;
2243 2226 }
2244 2227 }
2245 2228
2246 2229 /*
2247 2230 * Cache control operations over the interval [addr, addr + size) in
2248 2231 * address space "as".
2249 2232 */
2250 2233 /*ARGSUSED*/
2251 2234 int
2252 2235 as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr,
2253 2236 uintptr_t arg, ulong_t *lock_map, size_t pos)
2254 2237 {
2255 2238 struct seg *seg; /* working segment */
2256 2239 caddr_t raddr; /* rounded down addr */
2257 2240 caddr_t initraddr; /* saved initial rounded down addr */
2258 2241 size_t rsize; /* rounded up size */
2259 2242 size_t initrsize; /* saved initial rounded up size */
2260 2243 size_t ssize; /* size of seg */
2261 2244 int error = 0; /* result */
2262 2245 size_t mlock_size; /* size of bitmap */
2263 2246 ulong_t *mlock_map; /* pointer to bitmap used */
2264 2247 /* to represent the locked */
2265 2248 /* pages. */
2266 2249 retry:
2267 2250 if (error == IE_RETRY)
2268 2251 AS_LOCK_ENTER(as, RW_WRITER);
2269 2252 else
2270 2253 AS_LOCK_ENTER(as, RW_READER);
2271 2254
2272 2255 /*
2273 2256 * If these are address space lock/unlock operations, loop over
2274 2257 * all segments in the address space, as appropriate.
2275 2258 */
2276 2259 if (func == MC_LOCKAS) {
2277 2260 size_t npages, idx;
2278 2261 size_t rlen = 0; /* rounded as length */
2279 2262
2280 2263 idx = pos;
2281 2264
2282 2265 if (arg & MCL_FUTURE) {
2283 2266 mutex_enter(&as->a_contents);
2284 2267 AS_SETPGLCK(as);
2285 2268 mutex_exit(&as->a_contents);
2286 2269 }
2287 2270 if ((arg & MCL_CURRENT) == 0) {
2288 2271 AS_LOCK_EXIT(as);
2289 2272 return (0);
2290 2273 }
2291 2274
2292 2275 seg = AS_SEGFIRST(as);
2293 2276 if (seg == NULL) {
2294 2277 AS_LOCK_EXIT(as);
2295 2278 return (0);
2296 2279 }
2297 2280
2298 2281 do {
2299 2282 raddr = (caddr_t)((uintptr_t)seg->s_base &
2300 2283 (uintptr_t)PAGEMASK);
2301 2284 rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2302 2285 PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2303 2286 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2304 2287
2305 2288 mlock_size = BT_BITOUL(btopr(rlen));
2306 2289 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2307 2290 sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2308 2291 AS_LOCK_EXIT(as);
2309 2292 return (EAGAIN);
2310 2293 }
2311 2294
2312 2295 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2313 2296 error = SEGOP_LOCKOP(seg, seg->s_base,
2314 2297 seg->s_size, attr, MC_LOCK, mlock_map, pos);
2315 2298 if (error != 0)
2316 2299 break;
2317 2300 pos += seg_pages(seg);
2318 2301 }
2319 2302
2320 2303 if (error) {
2321 2304 for (seg = AS_SEGFIRST(as); seg != NULL;
2322 2305 seg = AS_SEGNEXT(as, seg)) {
2323 2306
2324 2307 raddr = (caddr_t)((uintptr_t)seg->s_base &
2325 2308 (uintptr_t)PAGEMASK);
2326 2309 npages = seg_pages(seg);
2327 2310 as_segunlock(seg, raddr, attr, mlock_map,
2328 2311 idx, npages);
2329 2312 idx += npages;
2330 2313 }
2331 2314 }
2332 2315
2333 2316 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2334 2317 AS_LOCK_EXIT(as);
2335 2318 goto lockerr;
2336 2319 } else if (func == MC_UNLOCKAS) {
2337 2320 mutex_enter(&as->a_contents);
2338 2321 AS_CLRPGLCK(as);
2339 2322 mutex_exit(&as->a_contents);
2340 2323
2341 2324 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2342 2325 error = SEGOP_LOCKOP(seg, seg->s_base,
2343 2326 seg->s_size, attr, MC_UNLOCK, NULL, 0);
2344 2327 if (error != 0)
2345 2328 break;
2346 2329 }
2347 2330
2348 2331 AS_LOCK_EXIT(as);
2349 2332 goto lockerr;
2350 2333 }
2351 2334
2352 2335 /*
2353 2336 * Normalize addresses and sizes.
2354 2337 */
2355 2338 initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2356 2339 initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2357 2340 (size_t)raddr;
2358 2341
2359 2342 if (raddr + rsize < raddr) { /* check for wraparound */
2360 2343 AS_LOCK_EXIT(as);
2361 2344 return (ENOMEM);
2362 2345 }
2363 2346
2364 2347 /*
2365 2348 * Get initial segment.
2366 2349 */
2367 2350 if ((seg = as_segat(as, raddr)) == NULL) {
2368 2351 AS_LOCK_EXIT(as);
2369 2352 return (ENOMEM);
2370 2353 }
2371 2354
2372 2355 if (func == MC_LOCK) {
2373 2356 mlock_size = BT_BITOUL(btopr(rsize));
2374 2357 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2375 2358 sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2376 2359 AS_LOCK_EXIT(as);
2377 2360 return (EAGAIN);
2378 2361 }
2379 2362 }
2380 2363
2381 2364 /*
2382 2365 * Loop over all segments. If a hole in the address range is
2383 2366 * discovered, then fail. For each segment, perform the appropriate
2384 2367 * control operation.
2385 2368 */
2386 2369 while (rsize != 0) {
2387 2370
2388 2371 /*
2389 2372 * Make sure there's no hole, calculate the portion
2390 2373 * of the next segment to be operated over.
2391 2374 */
2392 2375 if (raddr >= seg->s_base + seg->s_size) {
2393 2376 seg = AS_SEGNEXT(as, seg);
2394 2377 if (seg == NULL || raddr != seg->s_base) {
2395 2378 if (func == MC_LOCK) {
2396 2379 as_unlockerr(as, attr, mlock_map,
2397 2380 initraddr, initrsize - rsize);
2398 2381 kmem_free(mlock_map,
2399 2382 mlock_size * sizeof (ulong_t));
2400 2383 }
2401 2384 AS_LOCK_EXIT(as);
2402 2385 return (ENOMEM);
2403 2386 }
2404 2387 }
2405 2388 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2406 2389 ssize = seg->s_base + seg->s_size - raddr;
2407 2390 else
2408 2391 ssize = rsize;
2409 2392
2410 2393 /*
2411 2394 * Dispatch on specific function.
2412 2395 */
2413 2396 switch (func) {
2414 2397
2415 2398 /*
2416 2399 * Synchronize cached data from mappings with backing
2417 2400 * objects.
2418 2401 */
2419 2402 case MC_SYNC:
2420 2403 if (error = SEGOP_SYNC(seg, raddr, ssize,
2421 2404 attr, (uint_t)arg)) {
2422 2405 AS_LOCK_EXIT(as);
2423 2406 return (error);
2424 2407 }
2425 2408 break;
2426 2409
2427 2410 /*
2428 2411 * Lock pages in memory.
2429 2412 */
2430 2413 case MC_LOCK:
2431 2414 if (error = SEGOP_LOCKOP(seg, raddr, ssize,
2432 2415 attr, func, mlock_map, pos)) {
2433 2416 as_unlockerr(as, attr, mlock_map, initraddr,
2434 2417 initrsize - rsize + ssize);
2435 2418 kmem_free(mlock_map, mlock_size *
2436 2419 sizeof (ulong_t));
2437 2420 AS_LOCK_EXIT(as);
2438 2421 goto lockerr;
2439 2422 }
2440 2423 break;
2441 2424
2442 2425 /*
2443 2426 * Unlock mapped pages.
2444 2427 */
2445 2428 case MC_UNLOCK:
2446 2429 (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func,
2447 2430 (ulong_t *)NULL, (size_t)NULL);
2448 2431 break;
2449 2432
2450 2433 /*
2451 2434 * Store VM advise for mapped pages in segment layer.
2452 2435 */
2453 2436 case MC_ADVISE:
2454 2437 error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg);
2455 2438
2456 2439 /*
2457 2440 * Check for regular errors and special retry error
2458 2441 */
2459 2442 if (error) {
2460 2443 if (error == IE_RETRY) {
2461 2444 /*
2462 2445 * Need to acquire writers lock, so
2463 2446 * have to drop readers lock and start
2464 2447 * all over again
2465 2448 */
2466 2449 AS_LOCK_EXIT(as);
2467 2450 goto retry;
2468 2451 } else if (error == IE_REATTACH) {
2469 2452 /*
2470 2453 * Find segment for current address
2471 2454 * because current segment just got
2472 2455 * split or concatenated
2473 2456 */
2474 2457 seg = as_segat(as, raddr);
2475 2458 if (seg == NULL) {
2476 2459 AS_LOCK_EXIT(as);
2477 2460 return (ENOMEM);
2478 2461 }
2479 2462 } else {
2480 2463 /*
2481 2464 * Regular error
2482 2465 */
2483 2466 AS_LOCK_EXIT(as);
2484 2467 return (error);
2485 2468 }
2486 2469 }
2487 2470 break;
2488 2471
2489 2472 case MC_INHERIT_ZERO:
2490 2473 if (seg->s_ops->inherit == NULL) {
2491 2474 error = ENOTSUP;
2492 2475 } else {
2493 2476 error = SEGOP_INHERIT(seg, raddr, ssize,
2494 2477 SEGP_INH_ZERO);
2495 2478 }
2496 2479 if (error != 0) {
2497 2480 AS_LOCK_EXIT(as);
2498 2481 return (error);
2499 2482 }
2500 2483 break;
2501 2484
2502 2485 /*
2503 2486 * Can't happen.
2504 2487 */
2505 2488 default:
2506 2489 panic("as_ctl: bad operation %d", func);
2507 2490 /*NOTREACHED*/
2508 2491 }
2509 2492
2510 2493 rsize -= ssize;
2511 2494 raddr += ssize;
2512 2495 }
2513 2496
2514 2497 if (func == MC_LOCK)
2515 2498 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2516 2499 AS_LOCK_EXIT(as);
2517 2500 return (0);
2518 2501 lockerr:
2519 2502
2520 2503 /*
2521 2504 * If the lower levels returned EDEADLK for a segment lockop,
2522 2505 * it means that we should retry the operation. Let's wait
2523 2506 * a bit also to let the deadlock causing condition clear.
2524 2507 * This is part of a gross hack to work around a design flaw
2525 2508 * in the ufs/sds logging code and should go away when the
2526 2509 * logging code is re-designed to fix the problem. See bug
2527 2510 * 4125102 for details of the problem.
2528 2511 */
2529 2512 if (error == EDEADLK) {
2530 2513 delay(deadlk_wait);
2531 2514 error = 0;
2532 2515 goto retry;
2533 2516 }
2534 2517 return (error);
2535 2518 }
2536 2519
2537 2520 int
2538 2521 fc_decode(faultcode_t fault_err)
2539 2522 {
2540 2523 int error = 0;
2541 2524
2542 2525 switch (FC_CODE(fault_err)) {
2543 2526 case FC_OBJERR:
2544 2527 error = FC_ERRNO(fault_err);
2545 2528 break;
2546 2529 case FC_PROT:
2547 2530 error = EACCES;
2548 2531 break;
2549 2532 default:
2550 2533 error = EFAULT;
2551 2534 break;
2552 2535 }
2553 2536 return (error);
2554 2537 }
2555 2538
2556 2539 /*
2557 2540 * Pagelock pages from a range that spans more than 1 segment. Obtain shadow
2558 2541 * lists from each segment and copy them to one contiguous shadow list (plist)
2559 2542 * as expected by the caller. Save pointers to per segment shadow lists at
2560 2543 * the tail of plist so that they can be used during as_pageunlock().
2561 2544 */
2562 2545 static int
2563 2546 as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp,
2564 2547 caddr_t addr, size_t size, enum seg_rw rw)
2565 2548 {
2566 2549 caddr_t sv_addr = addr;
2567 2550 size_t sv_size = size;
2568 2551 struct seg *sv_seg = seg;
2569 2552 ulong_t segcnt = 1;
2570 2553 ulong_t cnt;
2571 2554 size_t ssize;
2572 2555 pgcnt_t npages = btop(size);
2573 2556 page_t **plist;
2574 2557 page_t **pl;
2575 2558 int error;
2576 2559 caddr_t eaddr;
2577 2560 faultcode_t fault_err = 0;
2578 2561 pgcnt_t pl_off;
2579 2562 extern struct seg_ops segspt_shmops;
2580 2563
2581 2564 ASSERT(AS_LOCK_HELD(as));
2582 2565 ASSERT(seg != NULL);
2583 2566 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2584 2567 ASSERT(addr + size > seg->s_base + seg->s_size);
2585 2568 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2586 2569 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2587 2570
2588 2571 /*
2589 2572 * Count the number of segments covered by the range we are about to
2590 2573 * lock. The segment count is used to size the shadow list we return
2591 2574 * back to the caller.
2592 2575 */
2593 2576 for (; size != 0; size -= ssize, addr += ssize) {
2594 2577 if (addr >= seg->s_base + seg->s_size) {
2595 2578
2596 2579 seg = AS_SEGNEXT(as, seg);
2597 2580 if (seg == NULL || addr != seg->s_base) {
2598 2581 AS_LOCK_EXIT(as);
2599 2582 return (EFAULT);
2600 2583 }
2601 2584 /*
2602 2585 * Do a quick check if subsequent segments
2603 2586 * will most likely support pagelock.
2604 2587 */
2605 2588 if (seg->s_ops == &segvn_ops) {
2606 2589 vnode_t *vp;
2607 2590
2608 2591 if (SEGOP_GETVP(seg, addr, &vp) != 0 ||
2609 2592 vp != NULL) {
2610 2593 AS_LOCK_EXIT(as);
2611 2594 goto slow;
2612 2595 }
2613 2596 } else if (seg->s_ops != &segspt_shmops) {
2614 2597 AS_LOCK_EXIT(as);
2615 2598 goto slow;
2616 2599 }
2617 2600 segcnt++;
2618 2601 }
2619 2602 if (addr + size > seg->s_base + seg->s_size) {
2620 2603 ssize = seg->s_base + seg->s_size - addr;
2621 2604 } else {
2622 2605 ssize = size;
2623 2606 }
2624 2607 }
2625 2608 ASSERT(segcnt > 1);
2626 2609
2627 2610 plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2628 2611
2629 2612 addr = sv_addr;
2630 2613 size = sv_size;
2631 2614 seg = sv_seg;
2632 2615
2633 2616 for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2634 2617 if (addr >= seg->s_base + seg->s_size) {
2635 2618 seg = AS_SEGNEXT(as, seg);
2636 2619 ASSERT(seg != NULL && addr == seg->s_base);
2637 2620 cnt++;
2638 2621 ASSERT(cnt < segcnt);
2639 2622 }
2640 2623 if (addr + size > seg->s_base + seg->s_size) {
2641 2624 ssize = seg->s_base + seg->s_size - addr;
2642 2625 } else {
2643 2626 ssize = size;
2644 2627 }
2645 2628 pl = &plist[npages + cnt];
2646 2629 error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2647 2630 L_PAGELOCK, rw);
2648 2631 if (error) {
2649 2632 break;
2650 2633 }
2651 2634 ASSERT(plist[npages + cnt] != NULL);
2652 2635 ASSERT(pl_off + btop(ssize) <= npages);
2653 2636 bcopy(plist[npages + cnt], &plist[pl_off],
2654 2637 btop(ssize) * sizeof (page_t *));
2655 2638 pl_off += btop(ssize);
2656 2639 }
2657 2640
2658 2641 if (size == 0) {
2659 2642 AS_LOCK_EXIT(as);
2660 2643 ASSERT(cnt == segcnt - 1);
2661 2644 *ppp = plist;
2662 2645 return (0);
2663 2646 }
2664 2647
2665 2648 /*
2666 2649 * one of pagelock calls failed. The error type is in error variable.
2667 2650 * Unlock what we've locked so far and retry with F_SOFTLOCK if error
2668 2651 * type is either EFAULT or ENOTSUP. Otherwise just return the error
2669 2652 * back to the caller.
2670 2653 */
2671 2654
2672 2655 eaddr = addr;
2673 2656 seg = sv_seg;
2674 2657
2675 2658 for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2676 2659 if (addr >= seg->s_base + seg->s_size) {
2677 2660 seg = AS_SEGNEXT(as, seg);
2678 2661 ASSERT(seg != NULL && addr == seg->s_base);
2679 2662 cnt++;
2680 2663 ASSERT(cnt < segcnt);
2681 2664 }
2682 2665 if (eaddr > seg->s_base + seg->s_size) {
2683 2666 ssize = seg->s_base + seg->s_size - addr;
2684 2667 } else {
2685 2668 ssize = eaddr - addr;
2686 2669 }
2687 2670 pl = &plist[npages + cnt];
2688 2671 ASSERT(*pl != NULL);
2689 2672 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2690 2673 L_PAGEUNLOCK, rw);
2691 2674 }
2692 2675
2693 2676 AS_LOCK_EXIT(as);
2694 2677
2695 2678 kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2696 2679
2697 2680 if (error != ENOTSUP && error != EFAULT) {
2698 2681 return (error);
2699 2682 }
2700 2683
2701 2684 slow:
2702 2685 /*
2703 2686 * If we are here because pagelock failed due to the need to cow fault
2704 2687 * in the pages we want to lock F_SOFTLOCK will do this job and in
2705 2688 * next as_pagelock() call for this address range pagelock will
2706 2689 * hopefully succeed.
2707 2690 */
2708 2691 fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2709 2692 if (fault_err != 0) {
2710 2693 return (fc_decode(fault_err));
2711 2694 }
2712 2695 *ppp = NULL;
2713 2696
2714 2697 return (0);
2715 2698 }
2716 2699
2717 2700 /*
2718 2701 * lock pages in a given address space. Return shadow list. If
2719 2702 * the list is NULL, the MMU mapping is also locked.
2720 2703 */
2721 2704 int
2722 2705 as_pagelock(struct as *as, struct page ***ppp, caddr_t addr,
2723 2706 size_t size, enum seg_rw rw)
2724 2707 {
2725 2708 size_t rsize;
2726 2709 caddr_t raddr;
2727 2710 faultcode_t fault_err;
2728 2711 struct seg *seg;
2729 2712 int err;
2730 2713
2731 2714 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START,
2732 2715 "as_pagelock_start: addr %p size %ld", addr, size);
2733 2716
2734 2717 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2735 2718 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2736 2719 (size_t)raddr;
2737 2720
2738 2721 /*
2739 2722 * if the request crosses two segments let
2740 2723 * as_fault handle it.
2741 2724 */
2742 2725 AS_LOCK_ENTER(as, RW_READER);
2743 2726
2744 2727 seg = as_segat(as, raddr);
2745 2728 if (seg == NULL) {
2746 2729 AS_LOCK_EXIT(as);
2747 2730 return (EFAULT);
2748 2731 }
2749 2732 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2750 2733 if (raddr + rsize > seg->s_base + seg->s_size) {
2751 2734 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2752 2735 }
2753 2736 if (raddr + rsize <= raddr) {
2754 2737 AS_LOCK_EXIT(as);
2755 2738 return (EFAULT);
2756 2739 }
2757 2740
2758 2741 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2759 2742 "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2760 2743
2761 2744 /*
2762 2745 * try to lock pages and pass back shadow list
2763 2746 */
2764 2747 err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2765 2748
2766 2749 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2767 2750
2768 2751 AS_LOCK_EXIT(as);
2769 2752
2770 2753 if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2771 2754 return (err);
2772 2755 }
2773 2756
2774 2757 /*
2775 2758 * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2776 2759 * to no pagelock support for this segment or pages need to be cow
2777 2760 * faulted in. If fault is needed F_SOFTLOCK will do this job for
2778 2761 * this as_pagelock() call and in the next as_pagelock() call for the
2779 2762 * same address range pagelock call will hopefull succeed.
2780 2763 */
2781 2764 fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2782 2765 if (fault_err != 0) {
2783 2766 return (fc_decode(fault_err));
2784 2767 }
2785 2768 *ppp = NULL;
2786 2769
2787 2770 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end");
2788 2771 return (0);
2789 2772 }
2790 2773
2791 2774 /*
2792 2775 * unlock pages locked by as_pagelock_segs(). Retrieve per segment shadow
2793 2776 * lists from the end of plist and call pageunlock interface for each segment.
2794 2777 * Drop as lock and free plist.
2795 2778 */
2796 2779 static void
2797 2780 as_pageunlock_segs(struct as *as, struct seg *seg, caddr_t addr, size_t size,
2798 2781 struct page **plist, enum seg_rw rw)
2799 2782 {
2800 2783 ulong_t cnt;
2801 2784 caddr_t eaddr = addr + size;
2802 2785 pgcnt_t npages = btop(size);
2803 2786 size_t ssize;
2804 2787 page_t **pl;
2805 2788
2806 2789 ASSERT(AS_LOCK_HELD(as));
2807 2790 ASSERT(seg != NULL);
2808 2791 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2809 2792 ASSERT(addr + size > seg->s_base + seg->s_size);
2810 2793 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2811 2794 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2812 2795 ASSERT(plist != NULL);
2813 2796
2814 2797 for (cnt = 0; addr < eaddr; addr += ssize) {
2815 2798 if (addr >= seg->s_base + seg->s_size) {
2816 2799 seg = AS_SEGNEXT(as, seg);
2817 2800 ASSERT(seg != NULL && addr == seg->s_base);
2818 2801 cnt++;
2819 2802 }
2820 2803 if (eaddr > seg->s_base + seg->s_size) {
2821 2804 ssize = seg->s_base + seg->s_size - addr;
2822 2805 } else {
2823 2806 ssize = eaddr - addr;
2824 2807 }
2825 2808 pl = &plist[npages + cnt];
2826 2809 ASSERT(*pl != NULL);
2827 2810 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2828 2811 L_PAGEUNLOCK, rw);
2829 2812 }
2830 2813 ASSERT(cnt > 0);
2831 2814 AS_LOCK_EXIT(as);
2832 2815
2833 2816 cnt++;
2834 2817 kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2835 2818 }
2836 2819
2837 2820 /*
2838 2821 * unlock pages in a given address range
2839 2822 */
2840 2823 void
2841 2824 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2842 2825 enum seg_rw rw)
2843 2826 {
2844 2827 struct seg *seg;
2845 2828 size_t rsize;
2846 2829 caddr_t raddr;
2847 2830
2848 2831 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START,
2849 2832 "as_pageunlock_start: addr %p size %ld", addr, size);
2850 2833
2851 2834 /*
2852 2835 * if the shadow list is NULL, as_pagelock was
2853 2836 * falling back to as_fault
2854 2837 */
2855 2838 if (pp == NULL) {
2856 2839 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2857 2840 return;
2858 2841 }
2859 2842
2860 2843 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2861 2844 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2862 2845 (size_t)raddr;
2863 2846
2864 2847 AS_LOCK_ENTER(as, RW_READER);
2865 2848 seg = as_segat(as, raddr);
2866 2849 ASSERT(seg != NULL);
2867 2850
2868 2851 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2869 2852 "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2870 2853
2871 2854 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2872 2855 if (raddr + rsize <= seg->s_base + seg->s_size) {
2873 2856 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2874 2857 } else {
2875 2858 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2876 2859 return;
2877 2860 }
2878 2861 AS_LOCK_EXIT(as);
2879 2862 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2880 2863 }
2881 2864
2882 2865 int
2883 2866 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2884 2867 boolean_t wait)
2885 2868 {
2886 2869 struct seg *seg;
2887 2870 size_t ssize;
2888 2871 caddr_t raddr; /* rounded down addr */
2889 2872 size_t rsize; /* rounded up size */
2890 2873 int error = 0;
2891 2874 size_t pgsz = page_get_pagesize(szc);
2892 2875
2893 2876 setpgsz_top:
2894 2877 if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) {
2895 2878 return (EINVAL);
2896 2879 }
2897 2880
2898 2881 raddr = addr;
2899 2882 rsize = size;
2900 2883
2901 2884 if (raddr + rsize < raddr) /* check for wraparound */
2902 2885 return (ENOMEM);
2903 2886
2904 2887 AS_LOCK_ENTER(as, RW_WRITER);
2905 2888 as_clearwatchprot(as, raddr, rsize);
2906 2889 seg = as_segat(as, raddr);
2907 2890 if (seg == NULL) {
2908 2891 as_setwatch(as);
2909 2892 AS_LOCK_EXIT(as);
2910 2893 return (ENOMEM);
2911 2894 }
2912 2895
2913 2896 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2914 2897 if (raddr >= seg->s_base + seg->s_size) {
2915 2898 seg = AS_SEGNEXT(as, seg);
2916 2899 if (seg == NULL || raddr != seg->s_base) {
2917 2900 error = ENOMEM;
2918 2901 break;
2919 2902 }
2920 2903 }
2921 2904 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2922 2905 ssize = seg->s_base + seg->s_size - raddr;
2923 2906 } else {
2924 2907 ssize = rsize;
2925 2908 }
2926 2909
2927 2910 retry:
2928 2911 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
2929 2912
2930 2913 if (error == IE_NOMEM) {
2931 2914 error = EAGAIN;
2932 2915 break;
2933 2916 }
2934 2917
2935 2918 if (error == IE_RETRY) {
2936 2919 AS_LOCK_EXIT(as);
2937 2920 goto setpgsz_top;
2938 2921 }
2939 2922
2940 2923 if (error == ENOTSUP) {
2941 2924 error = EINVAL;
2942 2925 break;
2943 2926 }
2944 2927
2945 2928 if (wait && (error == EAGAIN)) {
2946 2929 /*
2947 2930 * Memory is currently locked. It must be unlocked
2948 2931 * before this operation can succeed through a retry.
2949 2932 * The possible reasons for locked memory and
2950 2933 * corresponding strategies for unlocking are:
2951 2934 * (1) Normal I/O
2952 2935 * wait for a signal that the I/O operation
2953 2936 * has completed and the memory is unlocked.
2954 2937 * (2) Asynchronous I/O
2955 2938 * The aio subsystem does not unlock pages when
2956 2939 * the I/O is completed. Those pages are unlocked
2957 2940 * when the application calls aiowait/aioerror.
2958 2941 * So, to prevent blocking forever, cv_broadcast()
2959 2942 * is done to wake up aio_cleanup_thread.
2960 2943 * Subsequently, segvn_reclaim will be called, and
2961 2944 * that will do AS_CLRUNMAPWAIT() and wake us up.
2962 2945 * (3) Long term page locking:
2963 2946 * This is not relevant for as_setpagesize()
2964 2947 * because we cannot change the page size for
2965 2948 * driver memory. The attempt to do so will
2966 2949 * fail with a different error than EAGAIN so
2967 2950 * there's no need to trigger as callbacks like
2968 2951 * as_unmap, as_setprot or as_free would do.
2969 2952 */
2970 2953 mutex_enter(&as->a_contents);
2971 2954 if (!AS_ISNOUNMAPWAIT(as)) {
2972 2955 if (AS_ISUNMAPWAIT(as) == 0) {
2973 2956 cv_broadcast(&as->a_cv);
2974 2957 }
2975 2958 AS_SETUNMAPWAIT(as);
2976 2959 AS_LOCK_EXIT(as);
2977 2960 while (AS_ISUNMAPWAIT(as)) {
2978 2961 cv_wait(&as->a_cv, &as->a_contents);
2979 2962 }
2980 2963 } else {
2981 2964 /*
2982 2965 * We may have raced with
2983 2966 * segvn_reclaim()/segspt_reclaim(). In this
2984 2967 * case clean nounmapwait flag and retry since
2985 2968 * softlockcnt in this segment may be already
2986 2969 * 0. We don't drop as writer lock so our
2987 2970 * number of retries without sleeping should
2988 2971 * be very small. See segvn_reclaim() for
2989 2972 * more comments.
2990 2973 */
2991 2974 AS_CLRNOUNMAPWAIT(as);
2992 2975 mutex_exit(&as->a_contents);
2993 2976 goto retry;
2994 2977 }
2995 2978 mutex_exit(&as->a_contents);
2996 2979 goto setpgsz_top;
2997 2980 } else if (error != 0) {
2998 2981 break;
2999 2982 }
3000 2983 }
3001 2984 as_setwatch(as);
3002 2985 AS_LOCK_EXIT(as);
3003 2986 return (error);
3004 2987 }
3005 2988
3006 2989 /*
3007 2990 * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments
3008 2991 * in its chunk where s_szc is less than the szc we want to set.
3009 2992 */
3010 2993 static int
3011 2994 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3012 2995 int *retry)
3013 2996 {
3014 2997 struct seg *seg;
3015 2998 size_t ssize;
3016 2999 int error;
3017 3000
3018 3001 ASSERT(AS_WRITE_HELD(as));
3019 3002
3020 3003 seg = as_segat(as, raddr);
3021 3004 if (seg == NULL) {
3022 3005 panic("as_iset3_default_lpsize: no seg");
3023 3006 }
3024 3007
3025 3008 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
3026 3009 if (raddr >= seg->s_base + seg->s_size) {
3027 3010 seg = AS_SEGNEXT(as, seg);
3028 3011 if (seg == NULL || raddr != seg->s_base) {
3029 3012 panic("as_iset3_default_lpsize: as changed");
3030 3013 }
3031 3014 }
3032 3015 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3033 3016 ssize = seg->s_base + seg->s_size - raddr;
3034 3017 } else {
3035 3018 ssize = rsize;
3036 3019 }
3037 3020
3038 3021 if (szc > seg->s_szc) {
3039 3022 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
3040 3023 /* Only retry on EINVAL segments that have no vnode. */
3041 3024 if (error == EINVAL) {
3042 3025 vnode_t *vp = NULL;
3043 3026 if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) &&
3044 3027 (SEGOP_GETVP(seg, raddr, &vp) != 0 ||
3045 3028 vp == NULL)) {
3046 3029 *retry = 1;
3047 3030 } else {
3048 3031 *retry = 0;
3049 3032 }
3050 3033 }
3051 3034 if (error) {
3052 3035 return (error);
3053 3036 }
3054 3037 }
3055 3038 }
3056 3039 return (0);
3057 3040 }
3058 3041
3059 3042 /*
3060 3043 * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
3061 3044 * pagesize on each segment in its range, but if any fails with EINVAL,
3062 3045 * then it reduces the pagesizes to the next size in the bitmap and
3063 3046 * retries as_iset3_default_lpsize(). The reason why the code retries
3064 3047 * smaller allowed sizes on EINVAL is because (a) the anon offset may not
3065 3048 * match the bigger sizes, and (b) it's hard to get this offset (to begin
3066 3049 * with) to pass to map_pgszcvec().
3067 3050 */
3068 3051 static int
3069 3052 as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc,
3070 3053 uint_t szcvec)
3071 3054 {
3072 3055 int error;
3073 3056 int retry;
3074 3057
3075 3058 ASSERT(AS_WRITE_HELD(as));
3076 3059
3077 3060 for (;;) {
3078 3061 error = as_iset3_default_lpsize(as, addr, size, szc, &retry);
3079 3062 if (error == EINVAL && retry) {
3080 3063 szcvec &= ~(1 << szc);
3081 3064 if (szcvec <= 1) {
3082 3065 return (EINVAL);
3083 3066 }
3084 3067 szc = highbit(szcvec) - 1;
3085 3068 } else {
3086 3069 return (error);
3087 3070 }
3088 3071 }
3089 3072 }
3090 3073
3091 3074 /*
3092 3075 * as_iset1_default_lpsize() breaks its chunk into areas where existing
3093 3076 * segments have a smaller szc than we want to set. For each such area,
3094 3077 * it calls as_iset2_default_lpsize()
3095 3078 */
3096 3079 static int
3097 3080 as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3098 3081 uint_t szcvec)
3099 3082 {
3100 3083 struct seg *seg;
3101 3084 size_t ssize;
3102 3085 caddr_t setaddr = raddr;
3103 3086 size_t setsize = 0;
3104 3087 int set;
3105 3088 int error;
3106 3089
3107 3090 ASSERT(AS_WRITE_HELD(as));
3108 3091
3109 3092 seg = as_segat(as, raddr);
3110 3093 if (seg == NULL) {
3111 3094 panic("as_iset1_default_lpsize: no seg");
3112 3095 }
3113 3096 if (seg->s_szc < szc) {
3114 3097 set = 1;
3115 3098 } else {
3116 3099 set = 0;
3117 3100 }
3118 3101
3119 3102 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3120 3103 if (raddr >= seg->s_base + seg->s_size) {
3121 3104 seg = AS_SEGNEXT(as, seg);
3122 3105 if (seg == NULL || raddr != seg->s_base) {
3123 3106 panic("as_iset1_default_lpsize: as changed");
3124 3107 }
3125 3108 if (seg->s_szc >= szc && set) {
3126 3109 ASSERT(setsize != 0);
3127 3110 error = as_iset2_default_lpsize(as,
3128 3111 setaddr, setsize, szc, szcvec);
3129 3112 if (error) {
3130 3113 return (error);
3131 3114 }
3132 3115 set = 0;
3133 3116 } else if (seg->s_szc < szc && !set) {
3134 3117 setaddr = raddr;
3135 3118 setsize = 0;
3136 3119 set = 1;
3137 3120 }
3138 3121 }
3139 3122 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3140 3123 ssize = seg->s_base + seg->s_size - raddr;
3141 3124 } else {
3142 3125 ssize = rsize;
3143 3126 }
3144 3127 }
3145 3128 error = 0;
3146 3129 if (set) {
3147 3130 ASSERT(setsize != 0);
3148 3131 error = as_iset2_default_lpsize(as, setaddr, setsize,
3149 3132 szc, szcvec);
3150 3133 }
3151 3134 return (error);
3152 3135 }
3153 3136
3154 3137 /*
3155 3138 * as_iset_default_lpsize() breaks its chunk according to the size code bitmap
3156 3139 * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each
3157 3140 * chunk to as_iset1_default_lpsize().
3158 3141 */
3159 3142 static int
3160 3143 as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags,
3161 3144 int type)
3162 3145 {
3163 3146 int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
3164 3147 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr,
3165 3148 flags, rtype, 1);
3166 3149 uint_t szc;
3167 3150 uint_t nszc;
3168 3151 int error;
3169 3152 caddr_t a;
3170 3153 caddr_t eaddr;
3171 3154 size_t segsize;
3172 3155 size_t pgsz;
3173 3156 uint_t save_szcvec;
3174 3157
3175 3158 ASSERT(AS_WRITE_HELD(as));
3176 3159 ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
3177 3160 ASSERT(IS_P2ALIGNED(size, PAGESIZE));
3178 3161
3179 3162 szcvec &= ~1;
3180 3163 if (szcvec <= 1) { /* skip if base page size */
3181 3164 return (0);
3182 3165 }
3183 3166
3184 3167 /* Get the pagesize of the first larger page size. */
3185 3168 szc = lowbit(szcvec) - 1;
3186 3169 pgsz = page_get_pagesize(szc);
3187 3170 eaddr = addr + size;
3188 3171 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
3189 3172 eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
3190 3173
3191 3174 save_szcvec = szcvec;
3192 3175 szcvec >>= (szc + 1);
3193 3176 nszc = szc;
3194 3177 while (szcvec) {
3195 3178 if ((szcvec & 0x1) == 0) {
3196 3179 nszc++;
3197 3180 szcvec >>= 1;
3198 3181 continue;
3199 3182 }
3200 3183 nszc++;
3201 3184 pgsz = page_get_pagesize(nszc);
3202 3185 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
3203 3186 if (a != addr) {
3204 3187 ASSERT(szc > 0);
3205 3188 ASSERT(a < eaddr);
3206 3189 segsize = a - addr;
3207 3190 error = as_iset1_default_lpsize(as, addr, segsize, szc,
3208 3191 save_szcvec);
3209 3192 if (error) {
3210 3193 return (error);
3211 3194 }
3212 3195 addr = a;
3213 3196 }
3214 3197 szc = nszc;
3215 3198 szcvec >>= 1;
3216 3199 }
3217 3200
3218 3201 ASSERT(addr < eaddr);
3219 3202 szcvec = save_szcvec;
3220 3203 while (szcvec) {
3221 3204 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
3222 3205 ASSERT(a >= addr);
3223 3206 if (a != addr) {
3224 3207 ASSERT(szc > 0);
3225 3208 segsize = a - addr;
3226 3209 error = as_iset1_default_lpsize(as, addr, segsize, szc,
3227 3210 save_szcvec);
3228 3211 if (error) {
3229 3212 return (error);
3230 3213 }
3231 3214 addr = a;
3232 3215 }
3233 3216 szcvec &= ~(1 << szc);
3234 3217 if (szcvec) {
3235 3218 szc = highbit(szcvec) - 1;
3236 3219 pgsz = page_get_pagesize(szc);
3237 3220 }
3238 3221 }
3239 3222 ASSERT(addr == eaddr);
3240 3223
3241 3224 return (0);
3242 3225 }
3243 3226
3244 3227 /*
3245 3228 * Set the default large page size for the range. Called via memcntl with
3246 3229 * page size set to 0. as_set_default_lpsize breaks the range down into
3247 3230 * chunks with the same type/flags, ignores-non segvn segments, and passes
3248 3231 * each chunk to as_iset_default_lpsize().
3249 3232 */
3250 3233 int
3251 3234 as_set_default_lpsize(struct as *as, caddr_t addr, size_t size)
3252 3235 {
3253 3236 struct seg *seg;
3254 3237 caddr_t raddr;
3255 3238 size_t rsize;
3256 3239 size_t ssize;
3257 3240 int rtype, rflags;
3258 3241 int stype, sflags;
3259 3242 int error;
3260 3243 caddr_t setaddr;
3261 3244 size_t setsize;
3262 3245 int segvn;
3263 3246
3264 3247 if (size == 0)
3265 3248 return (0);
3266 3249
3267 3250 AS_LOCK_ENTER(as, RW_WRITER);
3268 3251 again:
3269 3252 error = 0;
3270 3253
3271 3254 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3272 3255 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3273 3256 (size_t)raddr;
3274 3257
3275 3258 if (raddr + rsize < raddr) { /* check for wraparound */
3276 3259 AS_LOCK_EXIT(as);
3277 3260 return (ENOMEM);
3278 3261 }
3279 3262 as_clearwatchprot(as, raddr, rsize);
3280 3263 seg = as_segat(as, raddr);
3281 3264 if (seg == NULL) {
3282 3265 as_setwatch(as);
3283 3266 AS_LOCK_EXIT(as);
3284 3267 return (ENOMEM);
3285 3268 }
3286 3269 if (seg->s_ops == &segvn_ops) {
3287 3270 rtype = SEGOP_GETTYPE(seg, addr);
3288 3271 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3289 3272 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3290 3273 segvn = 1;
3291 3274 } else {
3292 3275 segvn = 0;
3293 3276 }
3294 3277 setaddr = raddr;
3295 3278 setsize = 0;
3296 3279
3297 3280 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3298 3281 if (raddr >= (seg->s_base + seg->s_size)) {
3299 3282 seg = AS_SEGNEXT(as, seg);
3300 3283 if (seg == NULL || raddr != seg->s_base) {
3301 3284 error = ENOMEM;
3302 3285 break;
3303 3286 }
3304 3287 if (seg->s_ops == &segvn_ops) {
3305 3288 stype = SEGOP_GETTYPE(seg, raddr);
3306 3289 sflags = stype & (MAP_TEXT | MAP_INITDATA);
3307 3290 stype &= (MAP_SHARED | MAP_PRIVATE);
3308 3291 if (segvn && (rflags != sflags ||
3309 3292 rtype != stype)) {
3310 3293 /*
3311 3294 * The next segment is also segvn but
3312 3295 * has different flags and/or type.
3313 3296 */
3314 3297 ASSERT(setsize != 0);
3315 3298 error = as_iset_default_lpsize(as,
3316 3299 setaddr, setsize, rflags, rtype);
3317 3300 if (error) {
3318 3301 break;
3319 3302 }
3320 3303 rflags = sflags;
3321 3304 rtype = stype;
3322 3305 setaddr = raddr;
3323 3306 setsize = 0;
3324 3307 } else if (!segvn) {
3325 3308 rflags = sflags;
3326 3309 rtype = stype;
3327 3310 setaddr = raddr;
3328 3311 setsize = 0;
3329 3312 segvn = 1;
3330 3313 }
3331 3314 } else if (segvn) {
3332 3315 /* The next segment is not segvn. */
3333 3316 ASSERT(setsize != 0);
3334 3317 error = as_iset_default_lpsize(as,
3335 3318 setaddr, setsize, rflags, rtype);
3336 3319 if (error) {
3337 3320 break;
3338 3321 }
3339 3322 segvn = 0;
3340 3323 }
3341 3324 }
3342 3325 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3343 3326 ssize = seg->s_base + seg->s_size - raddr;
3344 3327 } else {
3345 3328 ssize = rsize;
3346 3329 }
3347 3330 }
3348 3331 if (error == 0 && segvn) {
3349 3332 /* The last chunk when rsize == 0. */
3350 3333 ASSERT(setsize != 0);
3351 3334 error = as_iset_default_lpsize(as, setaddr, setsize,
3352 3335 rflags, rtype);
3353 3336 }
3354 3337
3355 3338 if (error == IE_RETRY) {
3356 3339 goto again;
3357 3340 } else if (error == IE_NOMEM) {
3358 3341 error = EAGAIN;
3359 3342 } else if (error == ENOTSUP) {
3360 3343 error = EINVAL;
3361 3344 } else if (error == EAGAIN) {
3362 3345 mutex_enter(&as->a_contents);
3363 3346 if (!AS_ISNOUNMAPWAIT(as)) {
3364 3347 if (AS_ISUNMAPWAIT(as) == 0) {
3365 3348 cv_broadcast(&as->a_cv);
3366 3349 }
3367 3350 AS_SETUNMAPWAIT(as);
3368 3351 AS_LOCK_EXIT(as);
3369 3352 while (AS_ISUNMAPWAIT(as)) {
3370 3353 cv_wait(&as->a_cv, &as->a_contents);
3371 3354 }
3372 3355 mutex_exit(&as->a_contents);
3373 3356 AS_LOCK_ENTER(as, RW_WRITER);
3374 3357 } else {
3375 3358 /*
3376 3359 * We may have raced with
3377 3360 * segvn_reclaim()/segspt_reclaim(). In this case
3378 3361 * clean nounmapwait flag and retry since softlockcnt
3379 3362 * in this segment may be already 0. We don't drop as
3380 3363 * writer lock so our number of retries without
3381 3364 * sleeping should be very small. See segvn_reclaim()
3382 3365 * for more comments.
3383 3366 */
3384 3367 AS_CLRNOUNMAPWAIT(as);
3385 3368 mutex_exit(&as->a_contents);
3386 3369 }
3387 3370 goto again;
3388 3371 }
3389 3372
3390 3373 as_setwatch(as);
3391 3374 AS_LOCK_EXIT(as);
3392 3375 return (error);
3393 3376 }
3394 3377
3395 3378 /*
3396 3379 * Setup all of the uninitialized watched pages that we can.
3397 3380 */
3398 3381 void
3399 3382 as_setwatch(struct as *as)
3400 3383 {
3401 3384 struct watched_page *pwp;
3402 3385 struct seg *seg;
3403 3386 caddr_t vaddr;
3404 3387 uint_t prot;
3405 3388 int err, retrycnt;
3406 3389
3407 3390 if (avl_numnodes(&as->a_wpage) == 0)
3408 3391 return;
3409 3392
3410 3393 ASSERT(AS_WRITE_HELD(as));
3411 3394
3412 3395 for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3413 3396 pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3414 3397 retrycnt = 0;
3415 3398 retry:
3416 3399 vaddr = pwp->wp_vaddr;
3417 3400 if (pwp->wp_oprot != 0 || /* already set up */
3418 3401 (seg = as_segat(as, vaddr)) == NULL ||
3419 3402 SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0)
3420 3403 continue;
3421 3404
3422 3405 pwp->wp_oprot = prot;
3423 3406 if (pwp->wp_read)
3424 3407 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3425 3408 if (pwp->wp_write)
3426 3409 prot &= ~PROT_WRITE;
3427 3410 if (pwp->wp_exec)
3428 3411 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3429 3412 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3430 3413 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3431 3414 if (err == IE_RETRY) {
3432 3415 pwp->wp_oprot = 0;
3433 3416 ASSERT(retrycnt == 0);
3434 3417 retrycnt++;
3435 3418 goto retry;
3436 3419 }
3437 3420 }
3438 3421 pwp->wp_prot = prot;
3439 3422 }
3440 3423 }
3441 3424
3442 3425 /*
3443 3426 * Clear all of the watched pages in the address space.
3444 3427 */
3445 3428 void
3446 3429 as_clearwatch(struct as *as)
3447 3430 {
3448 3431 struct watched_page *pwp;
3449 3432 struct seg *seg;
3450 3433 caddr_t vaddr;
3451 3434 uint_t prot;
3452 3435 int err, retrycnt;
3453 3436
3454 3437 if (avl_numnodes(&as->a_wpage) == 0)
3455 3438 return;
3456 3439
3457 3440 ASSERT(AS_WRITE_HELD(as));
3458 3441
3459 3442 for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3460 3443 pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3461 3444 retrycnt = 0;
3462 3445 retry:
3463 3446 vaddr = pwp->wp_vaddr;
3464 3447 if (pwp->wp_oprot == 0 || /* not set up */
3465 3448 (seg = as_segat(as, vaddr)) == NULL)
3466 3449 continue;
3467 3450
3468 3451 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3469 3452 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3470 3453 if (err == IE_RETRY) {
3471 3454 ASSERT(retrycnt == 0);
3472 3455 retrycnt++;
3473 3456 goto retry;
3474 3457 }
3475 3458 }
3476 3459 pwp->wp_oprot = 0;
3477 3460 pwp->wp_prot = 0;
3478 3461 }
3479 3462 }
3480 3463
3481 3464 /*
3482 3465 * Force a new setup for all the watched pages in the range.
3483 3466 */
3484 3467 static void
3485 3468 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3486 3469 {
3487 3470 struct watched_page *pwp;
3488 3471 struct watched_page tpw;
3489 3472 caddr_t eaddr = addr + size;
3490 3473 caddr_t vaddr;
3491 3474 struct seg *seg;
3492 3475 int err, retrycnt;
3493 3476 uint_t wprot;
3494 3477 avl_index_t where;
3495 3478
3496 3479 if (avl_numnodes(&as->a_wpage) == 0)
3497 3480 return;
3498 3481
3499 3482 ASSERT(AS_WRITE_HELD(as));
3500 3483
3501 3484 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3502 3485 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3503 3486 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3504 3487
3505 3488 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3506 3489 retrycnt = 0;
3507 3490 vaddr = pwp->wp_vaddr;
3508 3491
3509 3492 wprot = prot;
3510 3493 if (pwp->wp_read)
3511 3494 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3512 3495 if (pwp->wp_write)
3513 3496 wprot &= ~PROT_WRITE;
3514 3497 if (pwp->wp_exec)
3515 3498 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3516 3499 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3517 3500 retry:
3518 3501 seg = as_segat(as, vaddr);
3519 3502 if (seg == NULL) {
3520 3503 panic("as_setwatchprot: no seg");
3521 3504 /*NOTREACHED*/
3522 3505 }
3523 3506 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot);
3524 3507 if (err == IE_RETRY) {
3525 3508 ASSERT(retrycnt == 0);
3526 3509 retrycnt++;
3527 3510 goto retry;
3528 3511 }
3529 3512 }
3530 3513 pwp->wp_oprot = prot;
3531 3514 pwp->wp_prot = wprot;
3532 3515
3533 3516 pwp = AVL_NEXT(&as->a_wpage, pwp);
3534 3517 }
3535 3518 }
3536 3519
3537 3520 /*
3538 3521 * Clear all of the watched pages in the range.
3539 3522 */
3540 3523 static void
3541 3524 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3542 3525 {
3543 3526 caddr_t eaddr = addr + size;
3544 3527 struct watched_page *pwp;
3545 3528 struct watched_page tpw;
3546 3529 uint_t prot;
3547 3530 struct seg *seg;
3548 3531 int err, retrycnt;
3549 3532 avl_index_t where;
3550 3533
3551 3534 if (avl_numnodes(&as->a_wpage) == 0)
3552 3535 return;
3553 3536
3554 3537 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3555 3538 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3556 3539 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3557 3540
3558 3541 ASSERT(AS_WRITE_HELD(as));
3559 3542
3560 3543 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3561 3544
3562 3545 if ((prot = pwp->wp_oprot) != 0) {
3563 3546 retrycnt = 0;
3564 3547
3565 3548 if (prot != pwp->wp_prot) {
3566 3549 retry:
3567 3550 seg = as_segat(as, pwp->wp_vaddr);
3568 3551 if (seg == NULL)
3569 3552 continue;
3570 3553 err = SEGOP_SETPROT(seg, pwp->wp_vaddr,
3571 3554 PAGESIZE, prot);
3572 3555 if (err == IE_RETRY) {
3573 3556 ASSERT(retrycnt == 0);
3574 3557 retrycnt++;
3575 3558 goto retry;
3576 3559
3577 3560 }
3578 3561 }
3579 3562 pwp->wp_oprot = 0;
3580 3563 pwp->wp_prot = 0;
3581 3564 }
3582 3565
3583 3566 pwp = AVL_NEXT(&as->a_wpage, pwp);
3584 3567 }
3585 3568 }
3586 3569
3587 3570 void
3588 3571 as_signal_proc(struct as *as, k_siginfo_t *siginfo)
3589 3572 {
3590 3573 struct proc *p;
3591 3574
3592 3575 mutex_enter(&pidlock);
3593 3576 for (p = practive; p; p = p->p_next) {
3594 3577 if (p->p_as == as) {
3595 3578 mutex_enter(&p->p_lock);
3596 3579 if (p->p_as == as)
3597 3580 sigaddq(p, NULL, siginfo, KM_NOSLEEP);
3598 3581 mutex_exit(&p->p_lock);
3599 3582 }
3600 3583 }
3601 3584 mutex_exit(&pidlock);
3602 3585 }
3603 3586
3604 3587 /*
3605 3588 * return memory object ID
3606 3589 */
3607 3590 int
3608 3591 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3609 3592 {
3610 3593 struct seg *seg;
3611 3594 int sts;
3612 3595
3613 3596 AS_LOCK_ENTER(as, RW_READER);
3614 3597 seg = as_segat(as, addr);
3615 3598 if (seg == NULL) {
3616 3599 AS_LOCK_EXIT(as);
3617 3600 return (EFAULT);
3618 3601 }
3619 3602 /*
3620 3603 * catch old drivers which may not support getmemid
3621 3604 */
3622 3605 if (seg->s_ops->getmemid == NULL) {
3623 3606 AS_LOCK_EXIT(as);
3624 3607 return (ENODEV);
3625 3608 }
3626 3609
3627 3610 sts = SEGOP_GETMEMID(seg, addr, memidp);
3628 3611
3629 3612 AS_LOCK_EXIT(as);
3630 3613 return (sts);
3631 3614 }
|
↓ open down ↓ |
1886 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX