9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /*
25 * Copyright (c) 2010, Intel Corporation.
26 * All rights reserved.
27 */
28 /*
29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
30 * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
31 */
32
33 /*
34 * VM - Hardware Address Translation management for i386 and amd64
35 *
36 * Implementation of the interfaces described in <common/vm/hat.h>
37 *
38 * Nearly all the details of how the hardware is managed should not be
39 * visible outside this layer except for misc. machine specific functions
40 * that work in conjunction with this code.
41 *
42 * Routines used only inside of i86pc/vm start with hati_ for HAT Internal.
43 */
44
45 #include <sys/machparam.h>
46 #include <sys/machsystm.h>
47 #include <sys/mman.h>
48 #include <sys/types.h>
49 #include <sys/systm.h>
248 uint_t r;
249 hat_kernel_range_t *rp;
250 uintptr_t va;
251 uintptr_t eva;
252 uint_t start;
253 uint_t cnt;
254 htable_t *src;
255
256 /*
257 * Once we start creating user process HATs we can enable
258 * the htable_steal() code.
259 */
260 if (can_steal_post_boot == 0)
261 can_steal_post_boot = 1;
262
263 ASSERT(AS_WRITE_HELD(as));
264 hat = kmem_cache_alloc(hat_cache, KM_SLEEP);
265 hat->hat_as = as;
266 mutex_init(&hat->hat_mutex, NULL, MUTEX_DEFAULT, NULL);
267 ASSERT(hat->hat_flags == 0);
268
269 #if defined(__xpv)
270 /*
271 * No VLP stuff on the hypervisor due to the 64-bit split top level
272 * page tables. On 32-bit it's not needed as the hypervisor takes
273 * care of copying the top level PTEs to a below 4Gig page.
274 */
275 use_vlp = 0;
276 #else /* __xpv */
277 /* 32 bit processes uses a VLP style hat when running with PAE */
278 #if defined(__amd64)
279 use_vlp = (ttoproc(curthread)->p_model == DATAMODEL_ILP32);
280 #elif defined(__i386)
281 use_vlp = mmu.pae_hat;
282 #endif
283 #endif /* __xpv */
284 if (use_vlp) {
285 hat->hat_flags = HAT_VLP;
286 bzero(hat->hat_vlp_ptes, VLP_SIZE);
287 }
384 mutex_exit(&hat_list_lock);
385
386 return (hat);
387 }
388
389 /*
390 * process has finished executing but as has not been cleaned up yet.
391 */
392 /*ARGSUSED*/
393 void
394 hat_free_start(hat_t *hat)
395 {
396 ASSERT(AS_WRITE_HELD(hat->hat_as));
397
398 /*
399 * If the hat is currently a stealing victim, wait for the stealing
400 * to finish. Once we mark it as HAT_FREEING, htable_steal()
401 * won't look at its pagetables anymore.
402 */
403 mutex_enter(&hat_list_lock);
404 while (hat->hat_flags & HAT_VICTIM)
405 cv_wait(&hat_list_cv, &hat_list_lock);
406 hat->hat_flags |= HAT_FREEING;
407 mutex_exit(&hat_list_lock);
408 }
409
410 /*
411 * An address space is being destroyed, so we destroy the associated hat.
412 */
413 void
414 hat_free_end(hat_t *hat)
415 {
416 kmem_cache_t *cache;
417
418 ASSERT(hat->hat_flags & HAT_FREEING);
419
420 /*
421 * must not be running on the given hat
422 */
423 ASSERT(CPU->cpu_current_hat != hat);
424
2445 */
2446 entry = htable_va2entry(vaddr, ht);
2447 hat_pte_unmap(ht, entry, flags, old_pte, NULL, B_FALSE);
2448 ASSERT(ht->ht_level <= mmu.max_page_level);
2449 vaddr += LEVEL_SIZE(ht->ht_level);
2450 contig_va = vaddr;
2451 ++r[r_cnt - 1].rng_cnt;
2452 }
2453 if (ht)
2454 htable_release(ht);
2455
2456 /*
2457 * handle last range for callbacks
2458 */
2459 if (r_cnt > 0)
2460 handle_ranges(hat, cb, r_cnt, r);
2461 XPV_ALLOW_MIGRATE();
2462 }
2463
2464 /*
2465 * Invalidate a virtual address translation on a slave CPU during
2466 * panic() dumps.
2467 */
2468 void
2469 hat_flush_range(hat_t *hat, caddr_t va, size_t size)
2470 {
2471 ssize_t sz;
2472 caddr_t endva = va + size;
2473
2474 while (va < endva) {
2475 sz = hat_getpagesize(hat, va);
2476 if (sz < 0) {
2477 #ifdef __xpv
2478 xen_flush_tlb();
2479 #else
2480 flush_all_tlb_entries();
2481 #endif
2482 break;
2483 }
2484 #ifdef __xpv
2485 xen_flush_va(va);
2486 #else
2487 mmu_tlbflush_entry(va);
2488 #endif
2489 va += sz;
2490 }
2491 }
2492
2493 /*
2494 * synchronize mapping with software data structures
2495 *
2496 * This interface is currently only used by the working set monitor
2497 * driver.
2498 */
2499 /*ARGSUSED*/
2500 void
2501 hat_sync(hat_t *hat, caddr_t addr, size_t len, uint_t flags)
2502 {
2503 uintptr_t vaddr = (uintptr_t)addr;
2504 uintptr_t eaddr = vaddr + len;
2505 htable_t *ht = NULL;
2506 uint_t entry;
2507 x86pte_t pte;
2508 x86pte_t save_pte;
2509 x86pte_t new;
2510 page_t *pp;
3304 * a fault when a page is made dirty. At this point
3305 * we need to remove write permission from all mappings
3306 * to this page.
3307 */
3308 hati_page_clrwrt(pp);
3309 }
3310 }
3311
3312 /*
3313 * If flag is specified, returns 0 if attribute is disabled
3314 * and non zero if enabled. If flag specifes multiple attributes
3315 * then returns 0 if ALL attributes are disabled. This is an advisory
3316 * call.
3317 */
3318 uint_t
3319 hat_page_getattr(struct page *pp, uint_t flag)
3320 {
3321 return (PP_GETRM(pp, flag));
3322 }
3323
3324
3325 /*
3326 * common code used by hat_pageunload() and hment_steal()
3327 */
3328 hment_t *
3329 hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
3330 {
3331 x86pte_t old_pte;
3332 pfn_t pfn = pp->p_pagenum;
3333 hment_t *hm;
3334
3335 /*
3336 * We need to acquire a hold on the htable in order to
3337 * do the invalidate. We know the htable must exist, since
3338 * unmap's don't release the htable until after removing any
3339 * hment. Having x86_hm_enter() keeps that from proceeding.
3340 */
3341 htable_acquire(ht);
3342
3343 /*
3344 * Invalidate the PTE and remove the hment.
3345 */
3346 old_pte = x86pte_inval(ht, entry, 0, NULL, B_TRUE);
3347 if (PTE2PFN(old_pte, ht->ht_level) != pfn) {
3348 panic("x86pte_inval() failure found PTE = " FMT_PTE
3349 " pfn being unmapped is %lx ht=0x%lx entry=0x%x",
3350 old_pte, pfn, (uintptr_t)ht, entry);
3351 }
3352
3353 /*
3354 * Clean up all the htable information for this mapping
3355 */
3356 ASSERT(ht->ht_valid_cnt > 0);
3357 HTABLE_DEC(ht->ht_valid_cnt);
3358 PGCNT_DEC(ht->ht_hat, ht->ht_level);
3359
3360 /*
3361 * sync ref/mod bits to the page_t
3362 */
3363 if (PTE_GET(old_pte, PT_SOFTWARE) < PT_NOSYNC)
3364 hati_sync_pte_to_page(pp, old_pte, ht->ht_level);
3365
3366 /*
3367 * Remove the mapping list entry for this page.
3368 */
3369 hm = hment_remove(pp, ht, entry);
3370
3371 /*
3372 * drop the mapping list lock so that we might free the
3373 * hment and htable.
3374 */
3375 x86_hm_exit(pp);
3376 htable_release(ht);
3377 return (hm);
3378 }
3379
3380 extern int vpm_enable;
3381 /*
3382 * Unload all translations to a page. If the page is a subpage of a large
3383 * page, the large page mappings are also removed.
3384 *
3385 * The forceflags are unused.
3386 */
3387
3388 /*ARGSUSED*/
3389 static int
3390 hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
3391 {
3392 page_t *cur_pp = pp;
3393 hment_t *hm;
3394 hment_t *prev;
3395 htable_t *ht;
3396 uint_t entry;
|
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /*
25 * Copyright (c) 2010, Intel Corporation.
26 * All rights reserved.
27 */
28 /*
29 * Copyright 2019 Nexenta Systems, Inc. All rights reserved.
30 * Copyright 2018 Joyent, Inc. All rights reserved.
31 * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
32 */
33
34 /*
35 * VM - Hardware Address Translation management for i386 and amd64
36 *
37 * Implementation of the interfaces described in <common/vm/hat.h>
38 *
39 * Nearly all the details of how the hardware is managed should not be
40 * visible outside this layer except for misc. machine specific functions
41 * that work in conjunction with this code.
42 *
43 * Routines used only inside of i86pc/vm start with hati_ for HAT Internal.
44 */
45
46 #include <sys/machparam.h>
47 #include <sys/machsystm.h>
48 #include <sys/mman.h>
49 #include <sys/types.h>
50 #include <sys/systm.h>
249 uint_t r;
250 hat_kernel_range_t *rp;
251 uintptr_t va;
252 uintptr_t eva;
253 uint_t start;
254 uint_t cnt;
255 htable_t *src;
256
257 /*
258 * Once we start creating user process HATs we can enable
259 * the htable_steal() code.
260 */
261 if (can_steal_post_boot == 0)
262 can_steal_post_boot = 1;
263
264 ASSERT(AS_WRITE_HELD(as));
265 hat = kmem_cache_alloc(hat_cache, KM_SLEEP);
266 hat->hat_as = as;
267 mutex_init(&hat->hat_mutex, NULL, MUTEX_DEFAULT, NULL);
268 ASSERT(hat->hat_flags == 0);
269 hat->hat_unmaps = 0;
270
271 #if defined(__xpv)
272 /*
273 * No VLP stuff on the hypervisor due to the 64-bit split top level
274 * page tables. On 32-bit it's not needed as the hypervisor takes
275 * care of copying the top level PTEs to a below 4Gig page.
276 */
277 use_vlp = 0;
278 #else /* __xpv */
279 /* 32 bit processes uses a VLP style hat when running with PAE */
280 #if defined(__amd64)
281 use_vlp = (ttoproc(curthread)->p_model == DATAMODEL_ILP32);
282 #elif defined(__i386)
283 use_vlp = mmu.pae_hat;
284 #endif
285 #endif /* __xpv */
286 if (use_vlp) {
287 hat->hat_flags = HAT_VLP;
288 bzero(hat->hat_vlp_ptes, VLP_SIZE);
289 }
386 mutex_exit(&hat_list_lock);
387
388 return (hat);
389 }
390
391 /*
392 * process has finished executing but as has not been cleaned up yet.
393 */
394 /*ARGSUSED*/
395 void
396 hat_free_start(hat_t *hat)
397 {
398 ASSERT(AS_WRITE_HELD(hat->hat_as));
399
400 /*
401 * If the hat is currently a stealing victim, wait for the stealing
402 * to finish. Once we mark it as HAT_FREEING, htable_steal()
403 * won't look at its pagetables anymore.
404 */
405 mutex_enter(&hat_list_lock);
406 while ((hat->hat_flags & HAT_VICTIM) || (hat->hat_unmaps > 0))
407 cv_wait(&hat_list_cv, &hat_list_lock);
408 hat->hat_flags |= HAT_FREEING;
409 mutex_exit(&hat_list_lock);
410 }
411
412 /*
413 * An address space is being destroyed, so we destroy the associated hat.
414 */
415 void
416 hat_free_end(hat_t *hat)
417 {
418 kmem_cache_t *cache;
419
420 ASSERT(hat->hat_flags & HAT_FREEING);
421
422 /*
423 * must not be running on the given hat
424 */
425 ASSERT(CPU->cpu_current_hat != hat);
426
2447 */
2448 entry = htable_va2entry(vaddr, ht);
2449 hat_pte_unmap(ht, entry, flags, old_pte, NULL, B_FALSE);
2450 ASSERT(ht->ht_level <= mmu.max_page_level);
2451 vaddr += LEVEL_SIZE(ht->ht_level);
2452 contig_va = vaddr;
2453 ++r[r_cnt - 1].rng_cnt;
2454 }
2455 if (ht)
2456 htable_release(ht);
2457
2458 /*
2459 * handle last range for callbacks
2460 */
2461 if (r_cnt > 0)
2462 handle_ranges(hat, cb, r_cnt, r);
2463 XPV_ALLOW_MIGRATE();
2464 }
2465
2466 /*
2467 * Flush the TLB for the local CPU
2468 * Invoked from a slave CPU during panic() dumps.
2469 */
2470 void
2471 hat_flush(void)
2472 {
2473 #ifdef __xpv
2474 xen_flush_tlb();
2475 #else
2476 flush_all_tlb_entries();
2477 #endif
2478 }
2479
2480 /*
2481 * synchronize mapping with software data structures
2482 *
2483 * This interface is currently only used by the working set monitor
2484 * driver.
2485 */
2486 /*ARGSUSED*/
2487 void
2488 hat_sync(hat_t *hat, caddr_t addr, size_t len, uint_t flags)
2489 {
2490 uintptr_t vaddr = (uintptr_t)addr;
2491 uintptr_t eaddr = vaddr + len;
2492 htable_t *ht = NULL;
2493 uint_t entry;
2494 x86pte_t pte;
2495 x86pte_t save_pte;
2496 x86pte_t new;
2497 page_t *pp;
3291 * a fault when a page is made dirty. At this point
3292 * we need to remove write permission from all mappings
3293 * to this page.
3294 */
3295 hati_page_clrwrt(pp);
3296 }
3297 }
3298
3299 /*
3300 * If flag is specified, returns 0 if attribute is disabled
3301 * and non zero if enabled. If flag specifes multiple attributes
3302 * then returns 0 if ALL attributes are disabled. This is an advisory
3303 * call.
3304 */
3305 uint_t
3306 hat_page_getattr(struct page *pp, uint_t flag)
3307 {
3308 return (PP_GETRM(pp, flag));
3309 }
3310
3311 /*
3312 * common code used by hat_pageunload() and hment_steal()
3313 */
3314 hment_t *
3315 hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
3316 {
3317 x86pte_t old_pte;
3318 pfn_t pfn = pp->p_pagenum;
3319 hment_t *hm;
3320 hat_t *hat = ht->ht_hat;
3321
3322 /*
3323 * There is a race between this function and the freeing of a HAT
3324 * whose owning process is exiting; process exit code ignores htable
3325 * reference counts.
3326 * If the HAT is already freeing (HAT_FREEING) no-op this function.
3327 * Otherwise increment hat_unmaps to block the hat from being free'd
3328 * until this function completes.
3329 */
3330 mutex_enter(&hat_list_lock);
3331 if (hat->hat_flags & HAT_FREEING) {
3332 mutex_exit(&hat_list_lock);
3333 x86_hm_exit(pp);
3334 return (NULL);
3335 }
3336 ++(hat->hat_unmaps);
3337 mutex_exit(&hat_list_lock);
3338
3339 /*
3340 * We need to acquire a hold on the htable in order to
3341 * do the invalidate. We know the htable must exist, since
3342 * unmap's don't release the htable until after removing any
3343 * hment. Having x86_hm_enter() keeps that from proceeding.
3344 */
3345 htable_acquire(ht);
3346
3347 /*
3348 * Invalidate the PTE and remove the hment.
3349 */
3350 old_pte = x86pte_inval(ht, entry, 0, NULL, B_TRUE);
3351 if (PTE2PFN(old_pte, ht->ht_level) != pfn) {
3352 panic("x86pte_inval() failure found PTE = " FMT_PTE
3353 " pfn being unmapped is %lx ht=0x%lx entry=0x%x",
3354 old_pte, pfn, (uintptr_t)ht, entry);
3355 }
3356
3357 /*
3358 * Clean up all the htable information for this mapping
3359 */
3360 ASSERT(ht->ht_valid_cnt > 0);
3361 HTABLE_DEC(ht->ht_valid_cnt);
3362 PGCNT_DEC(ht->ht_hat, ht->ht_level);
3363
3364 /*
3365 * sync ref/mod bits to the page_t
3366 */
3367 if (PTE_GET(old_pte, PT_SOFTWARE) < PT_NOSYNC)
3368 hati_sync_pte_to_page(pp, old_pte, ht->ht_level);
3369
3370 /*
3371 * Remove the mapping list entry for this page.
3372 */
3373 hm = hment_remove(pp, ht, entry);
3374
3375 /*
3376 * drop the mapping list lock so that we might free the hment and htable
3377 */
3378 x86_hm_exit(pp);
3379 htable_release(ht);
3380
3381 mutex_enter(&hat_list_lock);
3382 --(hat->hat_unmaps);
3383 cv_broadcast(&hat_list_cv);
3384 mutex_exit(&hat_list_lock);
3385 return (hm);
3386 }
3387
3388 extern int vpm_enable;
3389 /*
3390 * Unload all translations to a page. If the page is a subpage of a large
3391 * page, the large page mappings are also removed.
3392 *
3393 * The forceflags are unused.
3394 */
3395
3396 /*ARGSUSED*/
3397 static int
3398 hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
3399 {
3400 page_t *cur_pp = pp;
3401 hment_t *hm;
3402 hment_t *prev;
3403 htable_t *ht;
3404 uint_t entry;
|