Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/vm/seg_umap.c
+++ new/usr/src/uts/common/vm/seg_umap.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2016 Joyent, Inc.
14 14 */
15 15
16 16 /*
17 17 * VM - Kernel-to-user mapping segment
18 18 *
19 19 * The umap segment driver was primarily designed to facilitate the comm page:
20 20 * a portion of kernel memory shared with userspace so that certain (namely
21 21 * clock-related) actions could operate without making an expensive trip into
22 22 * the kernel.
23 23 *
24 24 * Since the initial requirements for the comm page are slim, advanced features
25 25 * of the segment driver such as per-page protection have been left
26 26 * unimplemented at this time.
27 27 */
28 28
29 29
30 30 #include <sys/types.h>
31 31 #include <sys/param.h>
32 32 #include <sys/errno.h>
33 33 #include <sys/cred.h>
34 34 #include <sys/kmem.h>
35 35 #include <sys/lgrp.h>
36 36 #include <sys/mman.h>
37 37
38 38 #include <vm/hat.h>
39 39 #include <vm/as.h>
40 40 #include <vm/seg.h>
41 41 #include <vm/seg_kmem.h>
42 42 #include <vm/seg_umap.h>
43 43
44 44
45 45 static boolean_t segumap_verify_safe(caddr_t, size_t);
46 46 static int segumap_dup(struct seg *, struct seg *);
47 47 static int segumap_unmap(struct seg *, caddr_t, size_t);
48 48 static void segumap_free(struct seg *);
49 49 static faultcode_t segumap_fault(struct hat *, struct seg *, caddr_t, size_t,
50 50 enum fault_type, enum seg_rw);
51 51 static faultcode_t segumap_faulta(struct seg *, caddr_t);
52 52 static int segumap_setprot(struct seg *, caddr_t, size_t, uint_t);
53 53 static int segumap_checkprot(struct seg *, caddr_t, size_t, uint_t);
54 54 static int segumap_sync(struct seg *, caddr_t, size_t, int, uint_t);
55 55 static size_t segumap_incore(struct seg *, caddr_t, size_t, char *);
56 56 static int segumap_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *,
57 57 size_t);
58 58 static int segumap_getprot(struct seg *, caddr_t, size_t, uint_t *);
59 59 static u_offset_t segumap_getoffset(struct seg *, caddr_t);
60 60 static int segumap_gettype(struct seg *, caddr_t);
61 61 static int segumap_getvp(struct seg *, caddr_t, struct vnode **);
62 62 static int segumap_advise(struct seg *, caddr_t, size_t, uint_t);
63 63 static void segumap_dump(struct seg *);
64 64 static int segumap_pagelock(struct seg *, caddr_t, size_t, struct page ***,
65 65 enum lock_type, enum seg_rw);
66 66 static int segumap_setpagesize(struct seg *, caddr_t, size_t, uint_t);
67 67 static int segumap_getmemid(struct seg *, caddr_t, memid_t *);
68 68 static int segumap_capable(struct seg *, segcapability_t);
69 69
70 70 static struct seg_ops segumap_ops = {
71 71 segumap_dup,
72 72 segumap_unmap,
73 73 segumap_free,
74 74 segumap_fault,
75 75 segumap_faulta,
76 76 segumap_setprot,
77 77 segumap_checkprot,
78 78 NULL, /* kluster: disabled */
79 79 NULL, /* swapout: disabled */
80 80 segumap_sync,
81 81 segumap_incore,
82 82 segumap_lockop,
83 83 segumap_getprot,
84 84 segumap_getoffset,
85 85 segumap_gettype,
86 86 segumap_getvp,
87 87 segumap_advise,
88 88 segumap_dump,
89 89 segumap_pagelock,
90 90 segumap_setpagesize,
91 91 segumap_getmemid,
92 92 NULL, /* getpolicy: disabled */
93 93 segumap_capable,
94 94 seg_inherit_notsup
95 95 };
96 96
97 97
98 98 /*
99 99 * Create a kernel/user-mapped segment.
100 100 */
101 101 int
102 102 segumap_create(struct seg *seg, void *argsp)
103 103 {
104 104 segumap_crargs_t *a = (struct segumap_crargs *)argsp;
105 105 segumap_data_t *data;
106 106
107 107 ASSERT((uintptr_t)a->kaddr > _userlimit);
108 108
109 109 /*
110 110 * Check several aspects of the mapping request to ensure validity:
111 111 * - kernel pages must reside entirely in kernel space
112 112 * - target protection must be user-accessible
113 113 * - kernel address must be page-aligned
114 114 * - kernel address must reside inside a "safe" segment
115 115 */
116 116 if ((uintptr_t)a->kaddr <= _userlimit ||
117 117 ((uintptr_t)a->kaddr + seg->s_size) < (uintptr_t)a->kaddr ||
118 118 (a->prot & PROT_USER) == 0 ||
119 119 ((uintptr_t)a->kaddr & PAGEOFFSET) != 0 ||
120 120 !segumap_verify_safe(a->kaddr, seg->s_size)) {
121 121 return (EINVAL);
122 122 }
123 123
124 124 data = kmem_zalloc(sizeof (*data), KM_SLEEP);
125 125 rw_init(&data->sud_lock, NULL, RW_DEFAULT, NULL);
126 126 data->sud_kaddr = a->kaddr;
127 127 data->sud_prot = a->prot;
128 128
129 129 seg->s_ops = &segumap_ops;
130 130 seg->s_data = data;
131 131 return (0);
132 132 }
133 133
134 134 static boolean_t
135 135 segumap_verify_safe(caddr_t kaddr, size_t len)
136 136 {
137 137 struct seg *seg;
138 138
139 139 /*
140 140 * Presently, only pages which are backed by segkmem are allowed to be
141 141 * shared with userspace. This prevents nasty paging behavior with
142 142 * other drivers such as seg_kp. Furthermore, the backing kernel
143 143 * segment must completely contain the region to be mapped.
144 144 *
145 145 * Failing these checks is fatal for now since such mappings are done
146 146 * in a very limited context from the kernel.
147 147 */
148 148 AS_LOCK_ENTER(&kas, RW_READER);
149 149 seg = as_segat(&kas, kaddr);
150 150 VERIFY(seg != NULL);
151 151 VERIFY(seg->s_base + seg->s_size >= kaddr + len);
152 152 VERIFY(seg->s_ops == &segkmem_ops);
153 153 AS_LOCK_EXIT(&kas);
154 154
155 155 return (B_TRUE);
156 156 }
157 157
158 158 static int
159 159 segumap_dup(struct seg *seg, struct seg *newseg)
160 160 {
161 161 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
162 162 segumap_data_t *newsud;
163 163
164 164 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
165 165
166 166 newsud = kmem_zalloc(sizeof (segumap_data_t), KM_SLEEP);
167 167 rw_init(&newsud->sud_lock, NULL, RW_DEFAULT, NULL);
168 168 newsud->sud_kaddr = sud->sud_kaddr;
169 169 newsud->sud_prot = sud->sud_prot;
170 170
171 171 newseg->s_ops = seg->s_ops;
172 172 newseg->s_data = newsud;
173 173 return (0);
174 174 }
175 175
176 176 static int
177 177 segumap_unmap(struct seg *seg, caddr_t addr, size_t len)
178 178 {
179 179 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
180 180
181 181 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
182 182
183 183 /* Only allow unmap of entire segment */
184 184 if (addr != seg->s_base || len != seg->s_size) {
185 185 return (EINVAL);
186 186 }
187 187 if (sud->sud_softlockcnt != 0) {
188 188 return (EAGAIN);
189 189 }
190 190
191 191 /*
192 192 * Unconditionally unload the entire segment range.
193 193 */
194 194 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
195 195
196 196 seg_free(seg);
197 197 return (0);
198 198 }
199 199
200 200 static void
201 201 segumap_free(struct seg *seg)
202 202 {
203 203 segumap_data_t *data = (segumap_data_t *)seg->s_data;
204 204
205 205 ASSERT(data != NULL);
206 206
207 207 rw_destroy(&data->sud_lock);
208 208 VERIFY(data->sud_softlockcnt == 0);
209 209 kmem_free(data, sizeof (*data));
210 210 seg->s_data = NULL;
211 211 }
212 212
213 213 /* ARGSUSED */
214 214 static faultcode_t
215 215 segumap_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
216 216 enum fault_type type, enum seg_rw tw)
217 217 {
218 218 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
219 219
220 220 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
221 221
222 222 if (type == F_PROT) {
223 223 /*
224 224 * Since protection on the segment is fixed, there is nothing
225 225 * to do but report an error for protection faults.
226 226 */
227 227 return (FC_PROT);
228 228 } else if (type == F_SOFTUNLOCK) {
229 229 size_t plen = btop(len);
230 230
231 231 rw_enter(&sud->sud_lock, RW_WRITER);
232 232 VERIFY(sud->sud_softlockcnt >= plen);
233 233 sud->sud_softlockcnt -= plen;
234 234 rw_exit(&sud->sud_lock);
235 235 return (0);
236 236 }
237 237
238 238 ASSERT(type == F_INVAL || type == F_SOFTLOCK);
239 239 rw_enter(&sud->sud_lock, RW_WRITER);
240 240
241 241 if (type == F_INVAL ||
242 242 (type == F_SOFTLOCK && sud->sud_softlockcnt == 0)) {
243 243 /*
244 244 * Load the (entire) segment into the HAT.
245 245 *
246 246 * It's possible that threads racing into as_fault will cause
247 247 * seg_umap to load the same range multiple times in quick
248 248 * succession. Redundant hat_devload operations are safe.
249 249 */
250 250 for (uintptr_t i = 0; i < seg->s_size; i += PAGESIZE) {
251 251 pfn_t pfn;
252 252
253 253 pfn = hat_getpfnum(kas.a_hat, sud->sud_kaddr + i);
254 254 VERIFY(pfn != PFN_INVALID);
255 255 hat_devload(seg->s_as->a_hat, seg->s_base + i,
256 256 PAGESIZE, pfn, sud->sud_prot, HAT_LOAD);
257 257 }
258 258 }
259 259 if (type == F_SOFTLOCK) {
260 260 size_t nval = sud->sud_softlockcnt + btop(len);
261 261
262 262 if (sud->sud_softlockcnt >= nval) {
263 263 rw_exit(&sud->sud_lock);
264 264 return (FC_MAKE_ERR(EOVERFLOW));
265 265 }
266 266 sud->sud_softlockcnt = nval;
267 267 }
268 268
269 269 rw_exit(&sud->sud_lock);
270 270 return (0);
271 271 }
272 272
273 273 /* ARGSUSED */
274 274 static faultcode_t
275 275 segumap_faulta(struct seg *seg, caddr_t addr)
276 276 {
277 277 /* Do nothing since asynch pagefault should not load translation. */
278 278 return (0);
279 279 }
280 280
281 281 /* ARGSUSED */
282 282 static int
283 283 segumap_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
284 284 {
285 285 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
286 286
287 287 /*
288 288 * The seg_umap driver does not yet allow protection to be changed.
289 289 */
290 290 return (EACCES);
291 291 }
292 292
293 293 /* ARGSUSED */
294 294 static int
295 295 segumap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
296 296 {
297 297 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
298 298 int error = 0;
299 299
300 300 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
301 301
302 302 rw_enter(&sud->sud_lock, RW_READER);
303 303 if ((sud->sud_prot & prot) != prot) {
304 304 error = EACCES;
305 305 }
306 306 rw_exit(&sud->sud_lock);
307 307 return (error);
308 308 }
309 309
310 310 /* ARGSUSED */
311 311 static int
312 312 segumap_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
313 313 {
314 314 /* Always succeed since there are no backing store to sync */
315 315 return (0);
316 316 }
317 317
318 318 /* ARGSUSED */
319 319 static size_t
320 320 segumap_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
321 321 {
322 322 size_t sz = 0;
323 323
324 324 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
325 325
326 326 len = (len + PAGEOFFSET) & PAGEMASK;
327 327 while (len > 0) {
328 328 *vec = 1;
329 329 sz += PAGESIZE;
330 330 vec++;
331 331 len -= PAGESIZE;
332 332 }
333 333 return (sz);
334 334 }
335 335
336 336 /* ARGSUSED */
337 337 static int
338 338 segumap_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op,
339 339 ulong_t *lockmap, size_t pos)
340 340 {
341 341 /* Report success since kernel pages are always in memory. */
342 342 return (0);
343 343 }
344 344
345 345 static int
346 346 segumap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
347 347 {
348 348 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
349 349 size_t pgno;
350 350 uint_t prot;
351 351
352 352 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
353 353
354 354 rw_enter(&sud->sud_lock, RW_READER);
355 355 prot = sud->sud_prot;
356 356 rw_exit(&sud->sud_lock);
357 357
358 358 /*
359 359 * Reporting protection is simple since it is not tracked per-page.
360 360 */
361 361 pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
362 362 while (pgno > 0) {
363 363 protv[--pgno] = prot;
364 364 }
365 365 return (0);
366 366 }
367 367
368 368 /* ARGSUSED */
369 369 static u_offset_t
370 370 segumap_getoffset(struct seg *seg, caddr_t addr)
371 371 {
372 372 /*
373 373 * To avoid leaking information about the layout of the kernel address
374 374 * space, always report '0' as the offset.
375 375 */
376 376 return (0);
377 377 }
378 378
379 379 /* ARGSUSED */
380 380 static int
381 381 segumap_gettype(struct seg *seg, caddr_t addr)
382 382 {
383 383 /*
384 384 * Since already-existing kernel pages are being mapped into userspace,
385 385 * always report the segment type as shared.
386 386 */
387 387 return (MAP_SHARED);
388 388 }
389 389
390 390 /* ARGSUSED */
391 391 static int
392 392 segumap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
393 393 {
394 394 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
395 395
396 396 *vpp = NULL;
397 397 return (0);
398 398 }
399 399
400 400 /* ARGSUSED */
401 401 static int
402 402 segumap_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
403 403 {
404 404 if (behav == MADV_PURGE) {
405 405 /* Purge does not make sense for this mapping */
406 406 return (EINVAL);
407 407 }
408 408 /* Indicate success for everything else. */
409 409 return (0);
410 410 }
411 411
412 412 /* ARGSUSED */
413 413 static void
414 414 segumap_dump(struct seg *seg)
415 415 {
416 416 /*
417 417 * Since this is a mapping to share kernel data with userspace, nothing
418 418 * additional should be dumped.
419 419 */
420 420 }
421 421
422 422 /* ARGSUSED */
423 423 static int
424 424 segumap_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
425 425 enum lock_type type, enum seg_rw rw)
426 426 {
427 427 return (ENOTSUP);
428 428 }
429 429
430 430 /* ARGSUSED */
431 431 static int
432 432 segumap_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
433 433 {
434 434 return (ENOTSUP);
435 435 }
436 436
437 437 static int
438 438 segumap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
439 439 {
440 440 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
441 441
442 442 memidp->val[0] = (uintptr_t)sud->sud_kaddr;
443 443 memidp->val[1] = (uintptr_t)(addr - seg->s_base);
444 444 return (0);
445 445 }
446 446
447 447 /* ARGSUSED */
448 448 static int
449 449 segumap_capable(struct seg *seg, segcapability_t capability)
450 450 {
451 451 /* no special capablities */
452 452 return (0);
453 453 }
|
↓ open down ↓ |
453 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX