Print this page
OS-3602 lxbrand LTP recv* tests failing on MSG_ERRQUEUE flag
OS-3600 lxbrand 32bit cannot boot with OS-3594 fix
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3594 lx brand: need support for MAP_32BIT
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/os/grow.c
+++ new/usr/src/uts/common/os/grow.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
|
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 -/* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
22 +/*
23 + * Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved.
24 + * Copyright (c) 2014, Joyent, Inc. All rights reserved.
25 + */
23 26
24 27 /*
25 28 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
26 29 * Use is subject to license terms.
27 30 */
28 31
29 32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 33 /* All Rights Reserved */
31 34
32 35 #include <sys/types.h>
33 36 #include <sys/inttypes.h>
34 37 #include <sys/param.h>
35 38 #include <sys/sysmacros.h>
36 39 #include <sys/systm.h>
37 40 #include <sys/signal.h>
38 41 #include <sys/user.h>
39 42 #include <sys/errno.h>
40 43 #include <sys/var.h>
41 44 #include <sys/proc.h>
42 45 #include <sys/tuneable.h>
43 46 #include <sys/debug.h>
44 47 #include <sys/cmn_err.h>
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
45 48 #include <sys/cred.h>
46 49 #include <sys/vnode.h>
47 50 #include <sys/vfs.h>
48 51 #include <sys/vm.h>
49 52 #include <sys/file.h>
50 53 #include <sys/mman.h>
51 54 #include <sys/vmparam.h>
52 55 #include <sys/fcntl.h>
53 56 #include <sys/lwpchan_impl.h>
54 57 #include <sys/nbmlock.h>
58 +#include <sys/brand.h>
55 59
56 60 #include <vm/hat.h>
57 61 #include <vm/as.h>
58 62 #include <vm/seg.h>
59 63 #include <vm/seg_dev.h>
60 64 #include <vm/seg_vn.h>
61 65
62 66 int use_brk_lpg = 1;
63 67 int use_stk_lpg = 1;
64 68
65 69 static int brk_lpg(caddr_t nva);
66 70 static int grow_lpg(caddr_t sp);
67 71
68 72 int
69 73 brk(caddr_t nva)
70 74 {
71 75 int error;
72 76 proc_t *p = curproc;
73 77
74 78 /*
75 79 * Serialize brk operations on an address space.
76 80 * This also serves as the lock protecting p_brksize
77 81 * and p_brkpageszc.
78 82 */
79 83 as_rangelock(p->p_as);
80 84 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
81 85 error = brk_lpg(nva);
82 86 } else {
83 87 error = brk_internal(nva, p->p_brkpageszc);
84 88 }
85 89 as_rangeunlock(p->p_as);
86 90 return ((error != 0 ? set_errno(error) : 0));
87 91 }
88 92
89 93 /*
90 94 * Algorithm: call arch-specific map_pgsz to get best page size to use,
91 95 * then call brk_internal().
92 96 * Returns 0 on success.
93 97 */
94 98 static int
95 99 brk_lpg(caddr_t nva)
96 100 {
97 101 struct proc *p = curproc;
98 102 size_t pgsz, len;
99 103 caddr_t addr, brkend;
100 104 caddr_t bssbase = p->p_bssbase;
101 105 caddr_t brkbase = p->p_brkbase;
102 106 int oszc, szc;
103 107 int err;
104 108
105 109 oszc = p->p_brkpageszc;
106 110
107 111 /*
108 112 * If p_brkbase has not yet been set, the first call
109 113 * to brk_internal() will initialize it.
110 114 */
111 115 if (brkbase == 0) {
112 116 return (brk_internal(nva, oszc));
113 117 }
114 118
115 119 len = nva - bssbase;
116 120
117 121 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
118 122 szc = page_szc(pgsz);
119 123
120 124 /*
121 125 * Covers two cases:
122 126 * 1. page_szc() returns -1 for invalid page size, so we want to
123 127 * ignore it in that case.
124 128 * 2. By design we never decrease page size, as it is more stable.
125 129 */
126 130 if (szc <= oszc) {
127 131 err = brk_internal(nva, oszc);
128 132 /* If failed, back off to base page size. */
129 133 if (err != 0 && oszc != 0) {
130 134 err = brk_internal(nva, 0);
131 135 }
132 136 return (err);
133 137 }
134 138
135 139 err = brk_internal(nva, szc);
136 140 /* If using szc failed, map with base page size and return. */
137 141 if (err != 0) {
138 142 if (szc != 0) {
139 143 err = brk_internal(nva, 0);
140 144 }
141 145 return (err);
142 146 }
143 147
144 148 /*
145 149 * Round up brk base to a large page boundary and remap
146 150 * anything in the segment already faulted in beyond that
147 151 * point.
148 152 */
149 153 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
150 154 brkend = brkbase + p->p_brksize;
151 155 len = brkend - addr;
152 156 /* Check that len is not negative. Update page size code for heap. */
153 157 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
154 158 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
155 159 p->p_brkpageszc = szc;
156 160 }
157 161
158 162 ASSERT(err == 0);
159 163 return (err); /* should always be 0 */
160 164 }
161 165
162 166 /*
163 167 * Returns 0 on success.
164 168 */
165 169 int
166 170 brk_internal(caddr_t nva, uint_t brkszc)
167 171 {
168 172 caddr_t ova; /* current break address */
169 173 size_t size;
170 174 int error;
171 175 struct proc *p = curproc;
172 176 struct as *as = p->p_as;
173 177 size_t pgsz;
174 178 uint_t szc;
175 179 rctl_qty_t as_rctl;
176 180
177 181 /*
178 182 * extend heap to brkszc alignment but use current p->p_brkpageszc
179 183 * for the newly created segment. This allows the new extension
180 184 * segment to be concatenated successfully with the existing brk
181 185 * segment.
182 186 */
183 187 if ((szc = brkszc) != 0) {
184 188 pgsz = page_get_pagesize(szc);
185 189 ASSERT(pgsz > PAGESIZE);
186 190 } else {
187 191 pgsz = PAGESIZE;
188 192 }
189 193
190 194 mutex_enter(&p->p_lock);
191 195 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
192 196 p->p_rctls, p);
193 197 mutex_exit(&p->p_lock);
194 198
195 199 /*
196 200 * If p_brkbase has not yet been set, the first call
197 201 * to brk() will initialize it.
198 202 */
199 203 if (p->p_brkbase == 0)
200 204 p->p_brkbase = nva;
201 205
202 206 /*
203 207 * Before multiple page size support existed p_brksize was the value
204 208 * not rounded to the pagesize (i.e. it stored the exact user request
205 209 * for heap size). If pgsz is greater than PAGESIZE calculate the
206 210 * heap size as the real new heap size by rounding it up to pgsz.
207 211 * This is useful since we may want to know where the heap ends
208 212 * without knowing heap pagesize (e.g. some old code) and also if
209 213 * heap pagesize changes we can update p_brkpageszc but delay adding
210 214 * new mapping yet still know from p_brksize where the heap really
211 215 * ends. The user requested heap end is stored in libc variable.
212 216 */
213 217 if (pgsz > PAGESIZE) {
214 218 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
215 219 size = tnva - p->p_brkbase;
216 220 if (tnva < p->p_brkbase || (size > p->p_brksize &&
217 221 size > (size_t)as_rctl)) {
218 222 szc = 0;
219 223 pgsz = PAGESIZE;
220 224 size = nva - p->p_brkbase;
221 225 }
222 226 } else {
223 227 size = nva - p->p_brkbase;
224 228 }
225 229
226 230 /*
227 231 * use PAGESIZE to roundup ova because we want to know the real value
228 232 * of the current heap end in case p_brkpageszc changes since the last
229 233 * p_brksize was computed.
230 234 */
231 235 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
232 236 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
233 237 PAGESIZE);
234 238
235 239 if ((nva < p->p_brkbase) || (size > p->p_brksize &&
236 240 size > as_rctl)) {
237 241 mutex_enter(&p->p_lock);
238 242 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
239 243 RCA_SAFE);
240 244 mutex_exit(&p->p_lock);
241 245 return (ENOMEM);
242 246 }
243 247
244 248 if (nva > ova) {
245 249 struct segvn_crargs crargs =
246 250 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
247 251
248 252 if (!(p->p_datprot & PROT_EXEC)) {
249 253 crargs.prot &= ~PROT_EXEC;
250 254 }
251 255
252 256 /*
253 257 * Add new zfod mapping to extend UNIX data segment
254 258 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
255 259 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
256 260 * page sizes if ova is not aligned to szc's pgsz.
257 261 */
258 262 if (szc > 0) {
259 263 caddr_t rbss;
260 264
261 265 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
262 266 pgsz);
263 267 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
264 268 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
265 269 AS_MAP_NO_LPOOB;
266 270 } else if (ova == rbss) {
267 271 crargs.szc = szc;
268 272 } else {
269 273 crargs.szc = AS_MAP_HEAP;
270 274 }
271 275 } else {
272 276 crargs.szc = AS_MAP_NO_LPOOB;
273 277 }
274 278 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
275 279 error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
276 280 &crargs);
277 281 if (error) {
278 282 return (error);
279 283 }
280 284
281 285 } else if (nva < ova) {
282 286 /*
283 287 * Release mapping to shrink UNIX data segment.
284 288 */
285 289 (void) as_unmap(as, nva, (size_t)(ova - nva));
286 290 }
287 291 p->p_brksize = size;
288 292 return (0);
289 293 }
290 294
291 295 /*
292 296 * Grow the stack to include sp. Return 1 if successful, 0 otherwise.
293 297 * This routine assumes that the stack grows downward.
294 298 */
295 299 int
296 300 grow(caddr_t sp)
297 301 {
298 302 struct proc *p = curproc;
299 303 struct as *as = p->p_as;
300 304 size_t oldsize = p->p_stksize;
301 305 size_t newsize;
302 306 int err;
303 307
304 308 /*
305 309 * Serialize grow operations on an address space.
306 310 * This also serves as the lock protecting p_stksize
307 311 * and p_stkpageszc.
308 312 */
309 313 as_rangelock(as);
310 314 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
311 315 err = grow_lpg(sp);
312 316 } else {
313 317 err = grow_internal(sp, p->p_stkpageszc);
314 318 }
315 319 as_rangeunlock(as);
316 320
317 321 if (err == 0 && (newsize = p->p_stksize) > oldsize) {
318 322 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
319 323 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
320 324 /*
321 325 * Set up translations so the process doesn't have to fault in
322 326 * the stack pages we just gave it.
323 327 */
324 328 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
325 329 newsize - oldsize, F_INVAL, S_WRITE);
326 330 }
327 331 return ((err == 0 ? 1 : 0));
328 332 }
329 333
330 334 /*
331 335 * Algorithm: call arch-specific map_pgsz to get best page size to use,
332 336 * then call grow_internal().
333 337 * Returns 0 on success.
334 338 */
335 339 static int
336 340 grow_lpg(caddr_t sp)
337 341 {
338 342 struct proc *p = curproc;
339 343 size_t pgsz;
340 344 size_t len, newsize;
341 345 caddr_t addr, saddr;
342 346 caddr_t growend;
343 347 int oszc, szc;
344 348 int err;
345 349
346 350 newsize = p->p_usrstack - sp;
347 351
348 352 oszc = p->p_stkpageszc;
349 353 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
350 354 szc = page_szc(pgsz);
351 355
352 356 /*
353 357 * Covers two cases:
354 358 * 1. page_szc() returns -1 for invalid page size, so we want to
355 359 * ignore it in that case.
356 360 * 2. By design we never decrease page size, as it is more stable.
357 361 * This shouldn't happen as the stack never shrinks.
358 362 */
359 363 if (szc <= oszc) {
360 364 err = grow_internal(sp, oszc);
361 365 /* failed, fall back to base page size */
362 366 if (err != 0 && oszc != 0) {
363 367 err = grow_internal(sp, 0);
364 368 }
365 369 return (err);
366 370 }
367 371
368 372 /*
369 373 * We've grown sufficiently to switch to a new page size.
370 374 * So we are going to remap the whole segment with the new page size.
371 375 */
372 376 err = grow_internal(sp, szc);
373 377 /* The grow with szc failed, so fall back to base page size. */
374 378 if (err != 0) {
375 379 if (szc != 0) {
376 380 err = grow_internal(sp, 0);
377 381 }
378 382 return (err);
379 383 }
380 384
381 385 /*
382 386 * Round up stack pointer to a large page boundary and remap
383 387 * any pgsz pages in the segment already faulted in beyond that
384 388 * point.
385 389 */
386 390 saddr = p->p_usrstack - p->p_stksize;
387 391 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
388 392 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
389 393 len = growend - addr;
390 394 /* Check that len is not negative. Update page size code for stack. */
391 395 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
392 396 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
393 397 p->p_stkpageszc = szc;
394 398 }
395 399
396 400 ASSERT(err == 0);
397 401 return (err); /* should always be 0 */
398 402 }
399 403
400 404 /*
401 405 * This routine assumes that the stack grows downward.
402 406 * Returns 0 on success, errno on failure.
403 407 */
404 408 int
405 409 grow_internal(caddr_t sp, uint_t growszc)
406 410 {
407 411 struct proc *p = curproc;
408 412 size_t newsize;
409 413 size_t oldsize;
410 414 int error;
411 415 size_t pgsz;
412 416 uint_t szc;
413 417 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
414 418
415 419 ASSERT(sp < p->p_usrstack);
416 420 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
417 421
418 422 /*
419 423 * grow to growszc alignment but use current p->p_stkpageszc for
420 424 * the segvn_crargs szc passed to segvn_create. For memcntl to
421 425 * increase the szc, this allows the new extension segment to be
422 426 * concatenated successfully with the existing stack segment.
423 427 */
424 428 if ((szc = growszc) != 0) {
425 429 pgsz = page_get_pagesize(szc);
426 430 ASSERT(pgsz > PAGESIZE);
427 431 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
428 432 if (newsize > (size_t)p->p_stk_ctl) {
429 433 szc = 0;
430 434 pgsz = PAGESIZE;
431 435 newsize = p->p_usrstack - sp;
432 436 }
433 437 } else {
434 438 pgsz = PAGESIZE;
435 439 newsize = p->p_usrstack - sp;
436 440 }
437 441
438 442 if (newsize > (size_t)p->p_stk_ctl) {
439 443 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
440 444 RCA_UNSAFE_ALL);
441 445
442 446 return (ENOMEM);
443 447 }
444 448
445 449 oldsize = p->p_stksize;
446 450 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
447 451
448 452 if (newsize <= oldsize) { /* prevent the stack from shrinking */
449 453 return (0);
450 454 }
451 455
452 456 if (!(p->p_stkprot & PROT_EXEC)) {
453 457 crargs.prot &= ~PROT_EXEC;
454 458 }
455 459 /*
456 460 * extend stack with the proposed new growszc, which is different
457 461 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
458 462 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
459 463 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
460 464 * if not aligned to szc's pgsz.
461 465 */
462 466 if (szc > 0) {
463 467 caddr_t oldsp = p->p_usrstack - oldsize;
464 468 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
465 469 pgsz);
466 470
467 471 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
468 472 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
469 473 AS_MAP_NO_LPOOB;
470 474 } else if (oldsp == austk) {
471 475 crargs.szc = szc;
472 476 } else {
473 477 crargs.szc = AS_MAP_STACK;
474 478 }
475 479 } else {
476 480 crargs.szc = AS_MAP_NO_LPOOB;
477 481 }
478 482 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
479 483
480 484 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
481 485 segvn_create, &crargs)) != 0) {
482 486 if (error == EAGAIN) {
483 487 cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
484 488 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
485 489 }
486 490 return (error);
487 491 }
488 492 p->p_stksize = newsize;
489 493 return (0);
490 494 }
491 495
492 496 /*
493 497 * Find address for user to map.
494 498 * If MAP_FIXED is not specified, we can pick any address we want, but we will
495 499 * first try the value in *addrp if it is non-NULL. Thus this is implementing
496 500 * a way to try and get a preferred address.
497 501 */
498 502 int
499 503 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
500 504 int vacalign, uint_t flags)
501 505 {
502 506 #if defined(__amd64)
503 507 proc_t *p = curproc;
504 508 #endif
505 509 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
506 510 size_t lenp;
507 511
508 512 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */
509 513
510 514 /*
511 515 * If we have been provided a hint, we should still expand the lenp
512 516 * to be the rest of the address space. This will allow us to
513 517 * treat the hint as a strong desire to be "nearby" the provided
514 518 * address. If we can't satisfy the hint, as_gap() will walk forward.
515 519 */
516 520 if (flags & _MAP_LOW32)
517 521 lenp = (caddr_t)USERLIMIT32 - basep;
518 522 #if defined(__amd64)
519 523 else if (p->p_model == DATAMODEL_NATIVE)
520 524 lenp = p->p_usrstack - basep -
521 525 ((p->p_stk_ctl + PAGEOFFSET) & PAGEMASK);
522 526 #endif
523 527 else
524 528 lenp = as->a_userlimit - basep;
525 529
526 530 if (flags & MAP_FIXED) {
527 531 (void) as_unmap(as, *addrp, len);
528 532 return (0);
529 533 } else if (basep != NULL && ((flags & MAP_ALIGN) == 0) &&
530 534 !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
531 535 /* User supplied address was available */
532 536 *addrp = basep;
533 537 } else {
534 538 /*
|
↓ open down ↓ |
470 lines elided |
↑ open up ↑ |
535 539 * No user supplied address or the address supplied was not
536 540 * available.
537 541 */
538 542 map_addr(addrp, len, off, vacalign, flags);
539 543 }
540 544 if (*addrp == NULL)
541 545 return (ENOMEM);
542 546 return (0);
543 547 }
544 548
549 +caddr_t
550 +map_userlimit(proc_t *pp, struct as *as, int flags)
551 +{
552 + if (flags & _MAP_LOW32) {
553 + if (PROC_IS_BRANDED(pp) && BROP(pp)->b_map32limit != NULL) {
554 + return ((caddr_t)(uintptr_t)BROP(pp)->b_map32limit(pp));
555 + } else {
556 + return ((caddr_t)_userlimit32);
557 + }
558 + }
545 559
560 + return (as->a_userlimit);
561 +}
562 +
563 +
546 564 /*
547 565 * Used for MAP_ANON - fast way to get anonymous pages
548 566 */
549 567 static int
550 568 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
551 569 offset_t pos)
552 570 {
553 571 struct segvn_crargs vn_a;
554 572 int error;
555 573
556 574 if (((PROT_ALL & uprot) != uprot))
557 575 return (EACCES);
558 576
559 577 if ((flags & MAP_FIXED) != 0) {
560 - caddr_t userlimit;
561 -
562 578 /*
563 579 * Use the user address. First verify that
564 580 * the address to be used is page aligned.
565 581 * Then make some simple bounds checks.
566 582 */
567 583 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
568 584 return (EINVAL);
569 585
570 - userlimit = flags & _MAP_LOW32 ?
571 - (caddr_t)USERLIMIT32 : as->a_userlimit;
572 - switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
586 + switch (valid_usr_range(*addrp, len, uprot, as,
587 + map_userlimit(as->a_proc, as, flags))) {
573 588 case RANGE_OKAY:
574 589 break;
575 590 case RANGE_BADPROT:
576 591 return (ENOTSUP);
577 592 case RANGE_BADADDR:
578 593 default:
579 594 return (ENOMEM);
580 595 }
581 596 }
582 597 /*
583 598 * No need to worry about vac alignment for anonymous
584 599 * pages since this is a "clone" object that doesn't
585 600 * yet exist.
586 601 */
587 602 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
588 603 if (error != 0) {
589 604 return (error);
590 605 }
591 606
592 607 /*
593 608 * Use the seg_vn segment driver; passing in the NULL amp
594 609 * gives the desired "cloning" effect.
595 610 */
596 611 vn_a.vp = NULL;
597 612 vn_a.offset = 0;
598 613 vn_a.type = flags & MAP_TYPE;
599 614 vn_a.prot = uprot;
600 615 vn_a.maxprot = PROT_ALL;
601 616 vn_a.flags = flags & ~MAP_TYPE;
602 617 vn_a.cred = CRED();
603 618 vn_a.amp = NULL;
604 619 vn_a.szc = 0;
605 620 vn_a.lgrp_mem_policy_flags = 0;
606 621
607 622 return (as_map(as, *addrp, len, segvn_create, &vn_a));
608 623 }
609 624
610 625 static int
611 626 smmap_common(caddr_t *addrp, size_t len,
612 627 int prot, int flags, struct file *fp, offset_t pos)
613 628 {
614 629 struct vnode *vp;
615 630 struct as *as = curproc->p_as;
616 631 uint_t uprot, maxprot, type;
617 632 int error;
618 633 int in_crit = 0;
619 634
620 635 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
621 636 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
622 637 MAP_TEXT | MAP_INITDATA)) != 0) {
623 638 /* | MAP_RENAME */ /* not implemented, let user know */
624 639 return (EINVAL);
625 640 }
626 641
627 642 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
628 643 return (EINVAL);
629 644 }
630 645
631 646 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
632 647 return (EINVAL);
633 648 }
634 649
635 650 #if defined(__sparc)
636 651 /*
637 652 * See if this is an "old mmap call". If so, remember this
638 653 * fact and convert the flags value given to mmap to indicate
639 654 * the specified address in the system call must be used.
640 655 * _MAP_NEW is turned set by all new uses of mmap.
641 656 */
642 657 if ((flags & _MAP_NEW) == 0)
643 658 flags |= MAP_FIXED;
644 659 #endif
645 660 flags &= ~_MAP_NEW;
646 661
647 662 type = flags & MAP_TYPE;
648 663 if (type != MAP_PRIVATE && type != MAP_SHARED)
649 664 return (EINVAL);
650 665
651 666
652 667 if (flags & MAP_ALIGN) {
653 668
654 669 if (flags & MAP_FIXED)
655 670 return (EINVAL);
656 671
657 672 /* alignment needs to be a power of 2 >= page size */
658 673 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
659 674 !ISP2((uintptr_t)*addrp))
660 675 return (EINVAL);
661 676 }
662 677 /*
663 678 * Check for bad lengths and file position.
664 679 * We let the VOP_MAP routine check for negative lengths
665 680 * since on some vnode types this might be appropriate.
666 681 */
667 682 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
668 683 return (EINVAL);
669 684
670 685 maxprot = PROT_ALL; /* start out allowing all accesses */
671 686 uprot = prot | PROT_USER;
672 687
673 688 if (fp == NULL) {
674 689 ASSERT(flags & MAP_ANON);
675 690 /* discard lwpchan mappings, like munmap() */
676 691 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
677 692 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
678 693 as_rangelock(as);
679 694 error = zmap(as, addrp, len, uprot, flags, pos);
680 695 as_rangeunlock(as);
681 696 /*
682 697 * Tell machine specific code that lwp has mapped shared memory
683 698 */
684 699 if (error == 0 && (flags & MAP_SHARED)) {
685 700 /* EMPTY */
686 701 LWP_MMODEL_SHARED_AS(*addrp, len);
687 702 }
688 703 return (error);
689 704 } else if ((flags & MAP_ANON) != 0)
690 705 return (EINVAL);
691 706
692 707 vp = fp->f_vnode;
693 708
694 709 /* Can't execute code from "noexec" mounted filesystem. */
695 710 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
696 711 maxprot &= ~PROT_EXEC;
697 712
698 713 /*
699 714 * These checks were added as part of large files.
700 715 *
701 716 * Return ENXIO if the initial position is negative; return EOVERFLOW
702 717 * if (offset + len) would overflow the maximum allowed offset for the
703 718 * type of file descriptor being used.
704 719 */
705 720 if (vp->v_type == VREG) {
706 721 if (pos < 0)
707 722 return (ENXIO);
708 723 if ((offset_t)len > (OFFSET_MAX(fp) - pos))
709 724 return (EOVERFLOW);
710 725 }
711 726
712 727 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
713 728 /* no write access allowed */
714 729 maxprot &= ~PROT_WRITE;
715 730 }
716 731
717 732 /*
718 733 * XXX - Do we also adjust maxprot based on protections
719 734 * of the vnode? E.g. if no execute permission is given
720 735 * on the vnode for the current user, maxprot probably
721 736 * should disallow PROT_EXEC also? This is different
722 737 * from the write access as this would be a per vnode
723 738 * test as opposed to a per fd test for writability.
724 739 */
725 740
726 741 /*
727 742 * Verify that the specified protections are not greater than
728 743 * the maximum allowable protections. Also test to make sure
729 744 * that the file descriptor does allows for read access since
|
↓ open down ↓ |
147 lines elided |
↑ open up ↑ |
730 745 * "write only" mappings are hard to do since normally we do
731 746 * the read from the file before the page can be written.
732 747 */
733 748 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
734 749 return (EACCES);
735 750
736 751 /*
737 752 * If the user specified an address, do some simple checks here
738 753 */
739 754 if ((flags & MAP_FIXED) != 0) {
740 - caddr_t userlimit;
741 -
742 755 /*
743 756 * Use the user address. First verify that
744 757 * the address to be used is page aligned.
745 758 * Then make some simple bounds checks.
746 759 */
747 760 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
748 761 return (EINVAL);
749 -
750 - userlimit = flags & _MAP_LOW32 ?
751 - (caddr_t)USERLIMIT32 : as->a_userlimit;
752 - switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
762 + switch (valid_usr_range(*addrp, len, uprot, as,
763 + map_userlimit(curproc, as, flags))) {
753 764 case RANGE_OKAY:
754 765 break;
755 766 case RANGE_BADPROT:
756 767 return (ENOTSUP);
757 768 case RANGE_BADADDR:
758 769 default:
759 770 return (ENOMEM);
760 771 }
761 772 }
762 773
763 774 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
764 775 nbl_need_check(vp)) {
765 776 int svmand;
766 777 nbl_op_t nop;
767 778
768 779 nbl_start_crit(vp, RW_READER);
769 780 in_crit = 1;
770 781 error = nbl_svmand(vp, fp->f_cred, &svmand);
771 782 if (error != 0)
772 783 goto done;
773 784 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
774 785 if (prot & (PROT_READ | PROT_EXEC)) {
775 786 nop = NBL_READWRITE;
776 787 } else {
777 788 nop = NBL_WRITE;
778 789 }
779 790 } else {
780 791 nop = NBL_READ;
781 792 }
782 793 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
783 794 error = EACCES;
784 795 goto done;
785 796 }
786 797 }
787 798
788 799 /* discard lwpchan mappings, like munmap() */
789 800 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
790 801 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
791 802
792 803 /*
793 804 * Ok, now let the vnode map routine do its thing to set things up.
794 805 */
795 806 error = VOP_MAP(vp, pos, as,
796 807 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
797 808
798 809 if (error == 0) {
799 810 /*
800 811 * Tell machine specific code that lwp has mapped shared memory
801 812 */
802 813 if (flags & MAP_SHARED) {
803 814 /* EMPTY */
804 815 LWP_MMODEL_SHARED_AS(*addrp, len);
805 816 }
806 817 if (vp->v_type == VREG &&
807 818 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
808 819 /*
809 820 * Mark this as an executable vnode
810 821 */
811 822 mutex_enter(&vp->v_lock);
812 823 vp->v_flag |= VVMEXEC;
813 824 mutex_exit(&vp->v_lock);
814 825 }
815 826 }
816 827
817 828 done:
818 829 if (in_crit)
819 830 nbl_end_crit(vp);
820 831 return (error);
821 832 }
822 833
823 834 #ifdef _LP64
824 835 /*
825 836 * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
826 837 *
827 838 * The "large file" mmap routine mmap64(2) is also mapped to this routine
828 839 * by the 64-bit version of libc.
829 840 *
830 841 * Eventually, this should be the only version, and have smmap_common()
831 842 * folded back into it again. Some day.
832 843 */
833 844 caddr_t
834 845 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
835 846 {
836 847 struct file *fp;
837 848 int error;
838 849
839 850 if (fd == -1 && (flags & MAP_ANON) != 0)
840 851 error = smmap_common(&addr, len, prot, flags,
841 852 NULL, (offset_t)pos);
842 853 else if ((fp = getf(fd)) != NULL) {
843 854 error = smmap_common(&addr, len, prot, flags,
844 855 fp, (offset_t)pos);
845 856 releasef(fd);
846 857 } else
847 858 error = EBADF;
848 859
849 860 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
850 861 }
851 862 #endif /* _LP64 */
852 863
853 864 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
854 865
855 866 /*
856 867 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
857 868 */
858 869 caddr_t
859 870 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
860 871 {
861 872 struct file *fp;
862 873 int error;
863 874 caddr_t a = (caddr_t)(uintptr_t)addr;
864 875
865 876 if (flags & _MAP_LOW32)
866 877 error = EINVAL;
867 878 else if (fd == -1 && (flags & MAP_ANON) != 0)
868 879 error = smmap_common(&a, (size_t)len, prot,
869 880 flags | _MAP_LOW32, NULL, (offset_t)pos);
870 881 else if ((fp = getf(fd)) != NULL) {
871 882 error = smmap_common(&a, (size_t)len, prot,
872 883 flags | _MAP_LOW32, fp, (offset_t)pos);
873 884 releasef(fd);
874 885 } else
875 886 error = EBADF;
876 887
877 888 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
878 889
879 890 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
880 891 }
881 892
882 893 /*
883 894 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
884 895 *
885 896 * Now things really get ugly because we can't use the C-style
886 897 * calling convention for more than 6 args, and 64-bit parameter
887 898 * passing on 32-bit systems is less than clean.
888 899 */
889 900
890 901 struct mmaplf32a {
891 902 caddr_t addr;
892 903 size_t len;
893 904 #ifdef _LP64
894 905 /*
895 906 * 32-bit contents, 64-bit cells
896 907 */
897 908 uint64_t prot;
898 909 uint64_t flags;
899 910 uint64_t fd;
900 911 uint64_t offhi;
901 912 uint64_t offlo;
902 913 #else
903 914 /*
904 915 * 32-bit contents, 32-bit cells
905 916 */
906 917 uint32_t prot;
907 918 uint32_t flags;
908 919 uint32_t fd;
909 920 uint32_t offhi;
910 921 uint32_t offlo;
911 922 #endif
912 923 };
913 924
914 925 int
915 926 smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
916 927 {
917 928 struct file *fp;
918 929 int error;
919 930 caddr_t a = uap->addr;
920 931 int flags = (int)uap->flags;
921 932 int fd = (int)uap->fd;
922 933 #ifdef _BIG_ENDIAN
923 934 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
924 935 #else
925 936 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
926 937 #endif
927 938
928 939 if (flags & _MAP_LOW32)
929 940 error = EINVAL;
930 941 else if (fd == -1 && (flags & MAP_ANON) != 0)
931 942 error = smmap_common(&a, uap->len, (int)uap->prot,
932 943 flags | _MAP_LOW32, NULL, off);
933 944 else if ((fp = getf(fd)) != NULL) {
934 945 error = smmap_common(&a, uap->len, (int)uap->prot,
935 946 flags | _MAP_LOW32, fp, off);
936 947 releasef(fd);
937 948 } else
938 949 error = EBADF;
939 950
940 951 if (error == 0)
941 952 rvp->r_val1 = (uintptr_t)a;
942 953 return (error);
943 954 }
944 955
945 956 #endif /* _SYSCALL32_IMPL || _ILP32 */
946 957
947 958 int
948 959 munmap(caddr_t addr, size_t len)
949 960 {
950 961 struct proc *p = curproc;
951 962 struct as *as = p->p_as;
952 963
953 964 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
954 965 return (set_errno(EINVAL));
955 966
956 967 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
957 968 return (set_errno(EINVAL));
958 969
959 970 /*
960 971 * Discard lwpchan mappings.
961 972 */
962 973 if (p->p_lcp != NULL)
963 974 lwpchan_delete_mapping(p, addr, addr + len);
964 975 if (as_unmap(as, addr, len) != 0)
965 976 return (set_errno(EINVAL));
966 977
967 978 return (0);
968 979 }
969 980
970 981 int
971 982 mprotect(caddr_t addr, size_t len, int prot)
972 983 {
973 984 struct as *as = curproc->p_as;
974 985 uint_t uprot = prot | PROT_USER;
975 986 int error;
976 987
977 988 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
978 989 return (set_errno(EINVAL));
979 990
980 991 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
981 992 case RANGE_OKAY:
982 993 break;
983 994 case RANGE_BADPROT:
984 995 return (set_errno(ENOTSUP));
985 996 case RANGE_BADADDR:
986 997 default:
987 998 return (set_errno(ENOMEM));
988 999 }
989 1000
990 1001 error = as_setprot(as, addr, len, uprot);
991 1002 if (error)
992 1003 return (set_errno(error));
993 1004 return (0);
994 1005 }
995 1006
996 1007 #define MC_CACHE 128 /* internal result buffer */
997 1008 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */
998 1009
999 1010 int
1000 1011 mincore(caddr_t addr, size_t len, char *vecp)
1001 1012 {
1002 1013 struct as *as = curproc->p_as;
1003 1014 caddr_t ea; /* end address of loop */
1004 1015 size_t rl; /* inner result length */
1005 1016 char vec[MC_CACHE]; /* local vector cache */
1006 1017 int error;
1007 1018 model_t model;
1008 1019 long llen;
1009 1020
1010 1021 model = get_udatamodel();
1011 1022 /*
1012 1023 * Validate form of address parameters.
1013 1024 */
1014 1025 if (model == DATAMODEL_NATIVE) {
1015 1026 llen = (long)len;
1016 1027 } else {
1017 1028 llen = (int32_t)(size32_t)len;
1018 1029 }
1019 1030 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1020 1031 return (set_errno(EINVAL));
1021 1032
1022 1033 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1023 1034 return (set_errno(ENOMEM));
1024 1035
1025 1036 /*
1026 1037 * Loop over subranges of interval [addr : addr + len), recovering
1027 1038 * results internally and then copying them out to caller. Subrange
1028 1039 * is based on the size of MC_CACHE, defined above.
1029 1040 */
1030 1041 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1031 1042 error = as_incore(as, addr,
1032 1043 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1033 1044 if (rl != 0) {
1034 1045 rl = (rl + PAGESIZE - 1) / PAGESIZE;
1035 1046 if (copyout(vec, vecp, rl) != 0)
1036 1047 return (set_errno(EFAULT));
1037 1048 vecp += rl;
1038 1049 }
1039 1050 if (error != 0)
1040 1051 return (set_errno(ENOMEM));
1041 1052 }
1042 1053 return (0);
1043 1054 }
|
↓ open down ↓ |
281 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX