11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /*
25 * Copyright (c) 2010, Intel Corporation.
26 * All rights reserved.
27 */
28 /*
29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
30 * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
31 */
32
33 /*
34 * VM - Hardware Address Translation management for i386 and amd64
35 *
36 * Implementation of the interfaces described in <common/vm/hat.h>
37 *
38 * Nearly all the details of how the hardware is managed should not be
39 * visible outside this layer except for misc. machine specific functions
40 * that work in conjunction with this code.
41 *
42 * Routines used only inside of i86pc/vm start with hati_ for HAT Internal.
43 */
44
45 #include <sys/machparam.h>
46 #include <sys/machsystm.h>
47 #include <sys/mman.h>
48 #include <sys/types.h>
49 #include <sys/systm.h>
50 #include <sys/cpuvar.h>
3306 * to this page.
3307 */
3308 hati_page_clrwrt(pp);
3309 }
3310 }
3311
3312 /*
3313 * If flag is specified, returns 0 if attribute is disabled
3314 * and non zero if enabled. If flag specifes multiple attributes
3315 * then returns 0 if ALL attributes are disabled. This is an advisory
3316 * call.
3317 */
3318 uint_t
3319 hat_page_getattr(struct page *pp, uint_t flag)
3320 {
3321 return (PP_GETRM(pp, flag));
3322 }
3323
3324
3325 /*
3326 * common code used by hat_pageunload() and hment_steal()
3327 */
3328 hment_t *
3329 hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
3330 {
3331 x86pte_t old_pte;
3332 pfn_t pfn = pp->p_pagenum;
3333 hment_t *hm;
3334
3335 /*
3336 * We need to acquire a hold on the htable in order to
3337 * do the invalidate. We know the htable must exist, since
3338 * unmap's don't release the htable until after removing any
3339 * hment. Having x86_hm_enter() keeps that from proceeding.
3340 */
3341 htable_acquire(ht);
3342
3343 /*
3344 * Invalidate the PTE and remove the hment.
3345 */
3346 old_pte = x86pte_inval(ht, entry, 0, NULL, B_TRUE);
3362 */
3363 if (PTE_GET(old_pte, PT_SOFTWARE) < PT_NOSYNC)
3364 hati_sync_pte_to_page(pp, old_pte, ht->ht_level);
3365
3366 /*
3367 * Remove the mapping list entry for this page.
3368 */
3369 hm = hment_remove(pp, ht, entry);
3370
3371 /*
3372 * drop the mapping list lock so that we might free the
3373 * hment and htable.
3374 */
3375 x86_hm_exit(pp);
3376 htable_release(ht);
3377 return (hm);
3378 }
3379
3380 extern int vpm_enable;
3381 /*
3382 * Unload all translations to a page. If the page is a subpage of a large
3383 * page, the large page mappings are also removed.
3384 *
3385 * The forceflags are unused.
3386 */
3387
3388 /*ARGSUSED*/
3389 static int
3390 hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
3391 {
3392 page_t *cur_pp = pp;
3393 hment_t *hm;
3394 hment_t *prev;
3395 htable_t *ht;
3396 uint_t entry;
3397 level_t level;
3398
3399 XPV_DISALLOW_MIGRATE();
3400
3401 /*
3402 * prevent recursion due to kmem_free()
3403 */
3404 ++curthread->t_hatdepth;
3405 ASSERT(curthread->t_hatdepth < 16);
3406
3407 #if defined(__amd64)
3408 /*
3409 * clear the vpm ref.
3410 */
3411 if (vpm_enable) {
3412 pp->p_vpmref = 0;
3413 }
3414 #endif
3415 /*
3416 * The loop with next_size handles pages with multiple pagesize mappings
3417 */
3418 next_size:
3419 for (;;) {
3420
3421 /*
3422 * Get a mapping list entry
3423 */
3424 x86_hm_enter(cur_pp);
3425 for (prev = NULL; ; prev = hm) {
3426 hm = hment_walk(cur_pp, &ht, &entry, prev);
3427 if (hm == NULL) {
3428 x86_hm_exit(cur_pp);
3429
3430 /*
3431 * If not part of a larger page, we're done.
3432 */
3433 if (cur_pp->p_szc <= pg_szcd) {
3434 ASSERT(curthread->t_hatdepth > 0);
3435 --curthread->t_hatdepth;
3436 XPV_ALLOW_MIGRATE();
3437 return (0);
3438 }
3439
3440 /*
3441 * Else check the next larger page size.
3442 * hat_page_demote() may decrease p_szc
3443 * but that's ok we'll just take an extra
3444 * trip discover there're no larger mappings
3445 * and return.
3446 */
3447 ++pg_szcd;
3448 cur_pp = PP_GROUPLEADER(cur_pp, pg_szcd);
3449 goto next_size;
3450 }
3451
3452 /*
3453 * If this mapping size matches, remove it.
3454 */
3455 level = ht->ht_level;
3456 if (level == pg_szcd)
3457 break;
3458 }
3459
3460 /*
3461 * Remove the mapping list entry for this page.
3462 * Note this does the x86_hm_exit() for us.
3463 */
3464 hm = hati_page_unmap(cur_pp, ht, entry);
3465 if (hm != NULL)
3466 hment_free(hm);
3467 }
3468 }
3469
3470 int
3471 hat_pageunload(struct page *pp, uint_t forceflag)
3472 {
3473 ASSERT(PAGE_EXCL(pp));
3474 return (hati_pageunload(pp, 0, forceflag));
3475 }
3476
3477 /*
3478 * Unload all large mappings to pp and reduce by 1 p_szc field of every large
3479 * page level that included pp.
3480 *
3481 * pp must be locked EXCL. Even though no other constituent pages are locked
3482 * it's legal to unload large mappings to pp because all constituent pages of
3483 * large locked mappings have to be locked SHARED. therefore if we have EXCL
3484 * lock on one of constituent pages none of the large mappings to pp are
3485 * locked.
3486 *
3487 * Change (always decrease) p_szc field starting from the last constituent
3488 * page and ending with root constituent page so that root's pszc always shows
3489 * the area where hat_page_demote() may be active.
3490 *
3491 * This mechanism is only used for file system pages where it's not always
3492 * possible to get EXCL locks on all constituent pages to demote the size code
3493 * (as is done for anonymous or kernel large pages).
3494 */
|
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /*
25 * Copyright (c) 2010, Intel Corporation.
26 * All rights reserved.
27 */
28 /*
29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
30 * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
31 * Copyright 2014 Joyent, Inc. All rights reserved.
32 */
33
34 /*
35 * VM - Hardware Address Translation management for i386 and amd64
36 *
37 * Implementation of the interfaces described in <common/vm/hat.h>
38 *
39 * Nearly all the details of how the hardware is managed should not be
40 * visible outside this layer except for misc. machine specific functions
41 * that work in conjunction with this code.
42 *
43 * Routines used only inside of i86pc/vm start with hati_ for HAT Internal.
44 */
45
46 #include <sys/machparam.h>
47 #include <sys/machsystm.h>
48 #include <sys/mman.h>
49 #include <sys/types.h>
50 #include <sys/systm.h>
51 #include <sys/cpuvar.h>
3307 * to this page.
3308 */
3309 hati_page_clrwrt(pp);
3310 }
3311 }
3312
3313 /*
3314 * If flag is specified, returns 0 if attribute is disabled
3315 * and non zero if enabled. If flag specifes multiple attributes
3316 * then returns 0 if ALL attributes are disabled. This is an advisory
3317 * call.
3318 */
3319 uint_t
3320 hat_page_getattr(struct page *pp, uint_t flag)
3321 {
3322 return (PP_GETRM(pp, flag));
3323 }
3324
3325
3326 /*
3327 * common code used by hat_page_inval() and hment_steal()
3328 */
3329 hment_t *
3330 hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
3331 {
3332 x86pte_t old_pte;
3333 pfn_t pfn = pp->p_pagenum;
3334 hment_t *hm;
3335
3336 /*
3337 * We need to acquire a hold on the htable in order to
3338 * do the invalidate. We know the htable must exist, since
3339 * unmap's don't release the htable until after removing any
3340 * hment. Having x86_hm_enter() keeps that from proceeding.
3341 */
3342 htable_acquire(ht);
3343
3344 /*
3345 * Invalidate the PTE and remove the hment.
3346 */
3347 old_pte = x86pte_inval(ht, entry, 0, NULL, B_TRUE);
3363 */
3364 if (PTE_GET(old_pte, PT_SOFTWARE) < PT_NOSYNC)
3365 hati_sync_pte_to_page(pp, old_pte, ht->ht_level);
3366
3367 /*
3368 * Remove the mapping list entry for this page.
3369 */
3370 hm = hment_remove(pp, ht, entry);
3371
3372 /*
3373 * drop the mapping list lock so that we might free the
3374 * hment and htable.
3375 */
3376 x86_hm_exit(pp);
3377 htable_release(ht);
3378 return (hm);
3379 }
3380
3381 extern int vpm_enable;
3382 /*
3383 * Unload translations to a page. If the page is a subpage of a large
3384 * page, the large page mappings are also removed.
3385 * If curhat is not NULL, then we only unload the translation
3386 * for the given process, otherwise all translations are unloaded.
3387 */
3388 void
3389 hat_page_inval(struct page *pp, uint_t pg_szcd, struct hat *curhat)
3390 {
3391 page_t *cur_pp = pp;
3392 hment_t *hm;
3393 hment_t *prev;
3394 htable_t *ht;
3395 uint_t entry;
3396 level_t level;
3397 ulong_t cnt;
3398
3399 XPV_DISALLOW_MIGRATE();
3400
3401 #if defined(__amd64)
3402 /*
3403 * clear the vpm ref.
3404 */
3405 if (vpm_enable) {
3406 pp->p_vpmref = 0;
3407 }
3408 #endif
3409 /*
3410 * The loop with next_size handles pages with multiple pagesize mappings
3411 */
3412 next_size:
3413 if (curhat != NULL)
3414 cnt = hat_page_getshare(cur_pp);
3415 for (;;) {
3416
3417 /*
3418 * Get a mapping list entry
3419 */
3420 x86_hm_enter(cur_pp);
3421 for (prev = NULL; ; prev = hm) {
3422 hm = hment_walk(cur_pp, &ht, &entry, prev);
3423 if (hm == NULL) {
3424 x86_hm_exit(cur_pp);
3425
3426 curproc_done:
3427 /*
3428 * If not part of a larger page, we're done.
3429 */
3430 if (cur_pp->p_szc <= pg_szcd) {
3431 XPV_ALLOW_MIGRATE();
3432 return;
3433 }
3434
3435 /*
3436 * Else check the next larger page size.
3437 * hat_page_demote() may decrease p_szc
3438 * but that's ok we'll just take an extra
3439 * trip discover there're no larger mappings
3440 * and return.
3441 */
3442 ++pg_szcd;
3443 cur_pp = PP_GROUPLEADER(cur_pp, pg_szcd);
3444 goto next_size;
3445 }
3446
3447 /*
3448 * If this mapping size matches, remove it.
3449 */
3450 level = ht->ht_level;
3451 if (level == pg_szcd) {
3452 if (curhat == NULL || ht->ht_hat == curhat)
3453 break;
3454 /*
3455 * Unloading only the given process but it's
3456 * not the hat for the current process. Leave
3457 * entry in place. Also do a safety check to
3458 * ensure we don't get in an infinite loop
3459 */
3460 if (cnt-- == 0) {
3461 x86_hm_exit(cur_pp);
3462 goto curproc_done;
3463 }
3464 }
3465 }
3466
3467 /*
3468 * Remove the mapping list entry for this page.
3469 * Note this does the x86_hm_exit() for us.
3470 */
3471 hm = hati_page_unmap(cur_pp, ht, entry);
3472 if (hm != NULL)
3473 hment_free(hm);
3474
3475 /* Perform check above for being part of a larger page. */
3476 if (curhat != NULL)
3477 goto curproc_done;
3478 }
3479 }
3480
3481 /*
3482 * Unload translations to a page. If unloadflag is HAT_CURPROC_PGUNLOAD, then
3483 * we only unload the translation for the current process, otherwise all
3484 * translations are unloaded.
3485 */
3486 static int
3487 hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t unloadflag)
3488 {
3489 struct hat *curhat = NULL;
3490
3491 /*
3492 * prevent recursion due to kmem_free()
3493 */
3494 ++curthread->t_hatdepth;
3495 ASSERT(curthread->t_hatdepth < 16);
3496
3497 if (unloadflag == HAT_CURPROC_PGUNLOAD)
3498 curhat = curthread->t_procp->p_as->a_hat;
3499
3500 hat_page_inval(pp, pg_szcd, curhat);
3501
3502 ASSERT(curthread->t_hatdepth > 0);
3503 --curthread->t_hatdepth;
3504 return (0);
3505 }
3506
3507 int
3508 hat_pageunload(struct page *pp, uint_t unloadflag)
3509 {
3510 ASSERT(PAGE_EXCL(pp));
3511 return (hati_pageunload(pp, 0, unloadflag));
3512 }
3513
3514 /*
3515 * Unload all large mappings to pp and reduce by 1 p_szc field of every large
3516 * page level that included pp.
3517 *
3518 * pp must be locked EXCL. Even though no other constituent pages are locked
3519 * it's legal to unload large mappings to pp because all constituent pages of
3520 * large locked mappings have to be locked SHARED. therefore if we have EXCL
3521 * lock on one of constituent pages none of the large mappings to pp are
3522 * locked.
3523 *
3524 * Change (always decrease) p_szc field starting from the last constituent
3525 * page and ending with root constituent page so that root's pszc always shows
3526 * the area where hat_page_demote() may be active.
3527 *
3528 * This mechanism is only used for file system pages where it's not always
3529 * possible to get EXCL locks on all constituent pages to demote the size code
3530 * (as is done for anonymous or kernel large pages).
3531 */
|