Print this page
OS-881 To workaround OS-580 add support to only invalidate mappings from a single process
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/syscall/memcntl.c
+++ new/usr/src/uts/common/syscall/memcntl.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright (c) 2015 Joyent, Inc.
25 25 */
26 26
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 28 /* All Rights Reserved */
29 29
30 30
31 31 #include <sys/types.h>
32 32 #include <sys/bitmap.h>
33 33 #include <sys/sysmacros.h>
34 34 #include <sys/kmem.h>
35 35 #include <sys/param.h>
36 36 #include <sys/systm.h>
37 37 #include <sys/user.h>
38 38 #include <sys/unistd.h>
39 39 #include <sys/errno.h>
40 40 #include <sys/proc.h>
41 41 #include <sys/mman.h>
42 42 #include <sys/tuneable.h>
43 43 #include <sys/cmn_err.h>
44 44 #include <sys/cred.h>
45 45 #include <sys/vmsystm.h>
46 46 #include <sys/debug.h>
47 47 #include <sys/policy.h>
48 48
49 49 #include <vm/as.h>
50 50 #include <vm/seg.h>
51 51
52 52 static uint_t mem_getpgszc(size_t);
53 53
54 54 /*
55 55 * Memory control operations
56 56 */
57 57 int
58 58 memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
59 59 {
60 60 struct as *as = ttoproc(curthread)->p_as;
61 61 struct proc *p = ttoproc(curthread);
62 62 size_t pgsz;
63 63 uint_t szc, oszc, pgcmd;
64 64 int error = 0;
65 65 faultcode_t fc;
66 66 uintptr_t iarg;
67 67 STRUCT_DECL(memcntl_mha, mha);
68 68
69 69 if (mask)
70 70 return (set_errno(EINVAL));
71 71 if ((cmd == MC_LOCKAS) || (cmd == MC_UNLOCKAS)) {
72 72 if ((addr != 0) || (len != 0)) {
73 73 return (set_errno(EINVAL));
74 74 }
75 75 } else if (cmd != MC_HAT_ADVISE) {
76 76 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) {
77 77 return (set_errno(EINVAL));
78 78 }
79 79 /*
80 80 * We're only concerned with the address range
81 81 * here, not the protections. The protections
82 82 * are only used as a "filter" in this code,
83 83 * they aren't set or modified here.
84 84 */
85 85 if (valid_usr_range(addr, len, 0, as,
86 86 as->a_userlimit) != RANGE_OKAY) {
87 87 return (set_errno(ENOMEM));
88 88 }
89 89 }
90 90
91 91 if (cmd == MC_HAT_ADVISE) {
92 92 if (attr != 0 || mask != 0) {
93 93 return (set_errno(EINVAL));
94 94 }
95 95
96 96 } else {
97 97 if ((VALID_ATTR & attr) != attr) {
98 98 return (set_errno(EINVAL));
99 99 }
100 100 if ((attr & SHARED) && (attr & PRIVATE)) {
101 101 return (set_errno(EINVAL));
102 102 }
103 103 if (((cmd == MC_LOCKAS) || (cmd == MC_LOCK) ||
104 104 (cmd == MC_UNLOCKAS) || (cmd == MC_UNLOCK)) &&
105 105 (error = secpolicy_lock_memory(CRED())) != 0)
106 106 return (set_errno(error));
107 107 }
|
↓ open down ↓ |
107 lines elided |
↑ open up ↑ |
108 108 if (attr) {
109 109 attr |= PROT_USER;
110 110 }
111 111
112 112 switch (cmd) {
113 113 case MC_SYNC:
114 114 /*
115 115 * MS_SYNC used to be defined to be zero but is now non-zero.
116 116 * For binary compatibility we still accept zero
117 117 * (the absence of MS_ASYNC) to mean the same thing.
118 + * Binary compatibility is not an issue for MS_INVALCURPROC.
118 119 */
119 120 iarg = (uintptr_t)arg;
120 121 if ((iarg & ~MS_INVALIDATE) == 0)
121 122 iarg |= MS_SYNC;
122 123
123 - if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) ||
124 - ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) {
124 + if (((iarg &
125 + ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE|MS_INVALCURPROC)) != 0) ||
126 + ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC)) ||
127 + ((iarg & (MS_INVALIDATE|MS_INVALCURPROC)) ==
128 + (MS_INVALIDATE|MS_INVALCURPROC))) {
125 129 error = set_errno(EINVAL);
126 130 } else {
127 131 error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0);
128 132 if (error) {
129 133 (void) set_errno(error);
130 134 }
131 135 }
132 136 return (error);
133 137 case MC_LOCKAS:
134 138 if ((uintptr_t)arg & ~(MCL_FUTURE|MCL_CURRENT) ||
135 139 (uintptr_t)arg == 0) {
136 140 return (set_errno(EINVAL));
137 141 }
138 142 break;
139 143 case MC_LOCK:
140 144 case MC_UNLOCKAS:
141 145 case MC_UNLOCK:
142 146 break;
143 147 case MC_HAT_ADVISE:
144 148 /*
145 149 * Set prefered page size.
146 150 */
147 151 STRUCT_INIT(mha, get_udatamodel());
148 152 if (copyin(arg, STRUCT_BUF(mha), STRUCT_SIZE(mha))) {
149 153 return (set_errno(EFAULT));
150 154 }
151 155
152 156 pgcmd = STRUCT_FGET(mha, mha_cmd);
153 157
154 158 /*
155 159 * Currently only MHA_MAPSIZE_VA, MHA_MAPSIZE_STACK
156 160 * and MHA_MAPSIZE_BSSBRK are supported. Only one
157 161 * command may be specified at a time.
158 162 */
159 163 if ((~(MHA_MAPSIZE_VA|MHA_MAPSIZE_STACK|MHA_MAPSIZE_BSSBRK) &
160 164 pgcmd) || pgcmd == 0 || !ISP2(pgcmd) ||
161 165 STRUCT_FGET(mha, mha_flags))
162 166 return (set_errno(EINVAL));
163 167
164 168 pgsz = STRUCT_FGET(mha, mha_pagesize);
165 169
166 170 /*
167 171 * call platform specific map_pgsz() routine to get the
168 172 * optimal pgsz if pgsz is 0.
169 173 *
170 174 * For stack and heap operations addr and len must be zero.
171 175 */
172 176 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
173 177 if (addr != NULL || len != 0) {
174 178 return (set_errno(EINVAL));
175 179 }
176 180
177 181 /*
178 182 * Disable autompss for this process unless pgsz == 0,
179 183 * which means the system should pick. In the
180 184 * pgsz == 0 case, leave the SAUTOLPG setting alone, as
181 185 * we don't want to enable it when someone has
182 186 * disabled automatic large page selection for the
183 187 * whole system.
184 188 */
185 189 mutex_enter(&p->p_lock);
186 190 if (pgsz != 0) {
187 191 p->p_flag &= ~SAUTOLPG;
188 192 }
189 193 mutex_exit(&p->p_lock);
190 194
191 195 as_rangelock(as);
192 196
193 197 if (pgsz == 0) {
194 198 int type;
195 199
196 200 if (pgcmd == MHA_MAPSIZE_BSSBRK)
197 201 type = MAPPGSZ_HEAP;
198 202 else
199 203 type = MAPPGSZ_STK;
200 204
201 205 pgsz = map_pgsz(type, p, 0, 0, 1);
202 206 }
203 207 } else {
204 208 /*
205 209 * addr and len must be valid for range specified.
206 210 */
207 211 if (valid_usr_range(addr, len, 0, as,
208 212 as->a_userlimit) != RANGE_OKAY) {
209 213 return (set_errno(ENOMEM));
210 214 }
211 215 /*
212 216 * Note that we don't disable automatic large page
213 217 * selection for anon segments based on use of
214 218 * memcntl().
215 219 */
216 220 if (pgsz == 0) {
217 221 error = as_set_default_lpsize(as, addr, len);
218 222 if (error) {
219 223 (void) set_errno(error);
220 224 }
221 225 return (error);
222 226 }
223 227
224 228 /*
225 229 * addr and len must be prefered page size aligned
226 230 */
227 231 if (!IS_P2ALIGNED(addr, pgsz) ||
228 232 !IS_P2ALIGNED(len, pgsz)) {
229 233 return (set_errno(EINVAL));
230 234 }
231 235 }
232 236
233 237 szc = mem_getpgszc(pgsz);
234 238 if (szc == (uint_t)-1) {
235 239 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK))
236 240 != 0) {
237 241 as_rangeunlock(as);
238 242 }
239 243 return (set_errno(EINVAL));
240 244 }
241 245
242 246 /*
243 247 * For stack and heap operations we first need to pad
244 248 * out existing range (create new mappings) to the new
245 249 * prefered page size boundary. Also the start of the
246 250 * .bss for the heap or user's stack base may not be on
247 251 * the new prefered page size boundary. For these cases
248 252 * we align the base of the request on the new prefered
249 253 * page size.
250 254 */
251 255 if (pgcmd & MHA_MAPSIZE_BSSBRK) {
252 256 if (szc == p->p_brkpageszc) {
253 257 as_rangeunlock(as);
254 258 return (0);
255 259 }
256 260 if (szc > p->p_brkpageszc) {
257 261 error = brk_internal(p->p_brkbase
258 262 + p->p_brksize, szc);
259 263 if (error) {
260 264 as_rangeunlock(as);
261 265 return (set_errno(error));
262 266 }
263 267 }
264 268 /*
265 269 * It is possible for brk_internal to silently fail to
266 270 * promote the heap size, so don't panic or ASSERT.
267 271 */
268 272 if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) {
269 273 as_rangeunlock(as);
270 274 return (set_errno(ENOMEM));
271 275 }
272 276 oszc = p->p_brkpageszc;
273 277 p->p_brkpageszc = szc;
274 278
275 279 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
276 280 pgsz);
277 281 len = (p->p_brkbase + p->p_brksize) - addr;
278 282 ASSERT(IS_P2ALIGNED(len, pgsz));
279 283 /*
280 284 * Perhaps no existing pages to promote.
281 285 */
282 286 if (len == 0) {
283 287 as_rangeunlock(as);
284 288 return (0);
285 289 }
286 290 }
287 291 /*
288 292 * The code below, as does grow.c, assumes stacks always grow
289 293 * downward.
290 294 */
291 295 if (pgcmd & MHA_MAPSIZE_STACK) {
292 296 if (szc == p->p_stkpageszc) {
293 297 as_rangeunlock(as);
294 298 return (0);
295 299 }
296 300
297 301 if (szc > p->p_stkpageszc) {
298 302 error = grow_internal(p->p_usrstack -
299 303 p->p_stksize, szc);
300 304 if (error) {
301 305 as_rangeunlock(as);
302 306 return (set_errno(error));
303 307 }
304 308 }
305 309 /*
306 310 * It is possible for grow_internal to silently fail to
307 311 * promote the stack size, so don't panic or ASSERT.
308 312 */
309 313 if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) {
310 314 as_rangeunlock(as);
311 315 return (set_errno(ENOMEM));
312 316 }
313 317 oszc = p->p_stkpageszc;
314 318 p->p_stkpageszc = szc;
315 319
316 320 addr = p->p_usrstack - p->p_stksize;
317 321 len = P2ALIGN(p->p_stksize, pgsz);
318 322
319 323 /*
320 324 * Perhaps nothing to promote.
321 325 */
322 326 if (len == 0 || addr >= p->p_usrstack ||
323 327 (addr + len) < addr) {
324 328 as_rangeunlock(as);
325 329 return (0);
326 330 }
327 331 }
328 332 ASSERT(IS_P2ALIGNED(addr, pgsz));
329 333 ASSERT(IS_P2ALIGNED(len, pgsz));
330 334 error = as_setpagesize(as, addr, len, szc, B_TRUE);
331 335
332 336 /*
333 337 * On stack or heap failures restore original
334 338 * pg size code.
335 339 */
336 340 if (error) {
337 341 if ((pgcmd & MHA_MAPSIZE_BSSBRK) != 0) {
338 342 p->p_brkpageszc = oszc;
339 343 }
340 344 if ((pgcmd & MHA_MAPSIZE_STACK) != 0) {
341 345 p->p_stkpageszc = oszc;
342 346 }
343 347 (void) set_errno(error);
344 348 }
345 349 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
346 350 as_rangeunlock(as);
347 351 }
348 352 return (error);
349 353 case MC_ADVISE:
350 354 if ((uintptr_t)arg == MADV_FREE ||
351 355 (uintptr_t)arg == MADV_PURGE) {
352 356 len &= PAGEMASK;
353 357 }
354 358 switch ((uintptr_t)arg) {
355 359 case MADV_WILLNEED:
356 360 fc = as_faulta(as, addr, len);
357 361 if (fc) {
358 362 if (FC_CODE(fc) == FC_OBJERR)
359 363 error = set_errno(FC_ERRNO(fc));
360 364 else if (FC_CODE(fc) == FC_NOMAP)
361 365 error = set_errno(ENOMEM);
362 366 else
363 367 error = set_errno(EINVAL);
364 368 return (error);
365 369 }
366 370 break;
367 371
368 372 case MADV_DONTNEED:
369 373 /*
370 374 * For now, don't need is turned into an as_ctl(MC_SYNC)
371 375 * operation flagged for async invalidate.
372 376 */
373 377 error = as_ctl(as, addr, len, MC_SYNC, attr,
374 378 MS_ASYNC | MS_INVALIDATE, NULL, 0);
375 379 if (error)
376 380 (void) set_errno(error);
377 381 return (error);
378 382
379 383 default:
380 384 error = as_ctl(as, addr, len, cmd, attr,
381 385 (uintptr_t)arg, NULL, 0);
382 386 if (error)
383 387 (void) set_errno(error);
384 388 return (error);
385 389 }
386 390 break;
387 391 case MC_INHERIT_ZERO:
388 392 if (arg != 0 || attr != 0 || mask != 0)
389 393 return (set_errno(EINVAL));
390 394 break;
391 395 default:
392 396 return (set_errno(EINVAL));
393 397 }
394 398
395 399 error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0);
396 400
397 401 if (error)
398 402 (void) set_errno(error);
399 403 return (error);
400 404 }
401 405
402 406 /*
403 407 * Return page size code for page size passed in. If
404 408 * matching page size not found or supported, return -1.
405 409 */
406 410 static uint_t
407 411 mem_getpgszc(size_t pgsz) {
408 412 return ((uint_t)page_szc_user_filtered(pgsz));
409 413 }
|
↓ open down ↓ |
275 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX