Print this page
OS-3088 need a lighterweight page invalidation mechanism for zone memcap
OS-881 To workaround OS-580 add support to only invalidate mappings from a single process

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/vm/hat_i86.c
          +++ new/usr/src/uts/i86pc/vm/hat_i86.c
↓ open down ↓ 20 lines elided ↑ open up ↑
  21   21  /*
  22   22   * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   */
  24   24  /*
  25   25   * Copyright (c) 2010, Intel Corporation.
  26   26   * All rights reserved.
  27   27   */
  28   28  /*
  29   29   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  30   30   * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
       31 + * Copyright 2014 Joyent, Inc.  All rights reserved.
  31   32   */
  32   33  
  33   34  /*
  34   35   * VM - Hardware Address Translation management for i386 and amd64
  35   36   *
  36   37   * Implementation of the interfaces described in <common/vm/hat.h>
  37   38   *
  38   39   * Nearly all the details of how the hardware is managed should not be
  39   40   * visible outside this layer except for misc. machine specific functions
  40   41   * that work in conjunction with this code.
↓ open down ↓ 3275 lines elided ↑ open up ↑
3316 3317   *      call.
3317 3318   */
3318 3319  uint_t
3319 3320  hat_page_getattr(struct page *pp, uint_t flag)
3320 3321  {
3321 3322          return (PP_GETRM(pp, flag));
3322 3323  }
3323 3324  
3324 3325  
3325 3326  /*
3326      - * common code used by hat_pageunload() and hment_steal()
     3327 + * common code used by hat_page_inval() and hment_steal()
3327 3328   */
3328 3329  hment_t *
3329 3330  hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
3330 3331  {
3331 3332          x86pte_t old_pte;
3332 3333          pfn_t pfn = pp->p_pagenum;
3333 3334          hment_t *hm;
3334 3335  
3335 3336          /*
3336 3337           * We need to acquire a hold on the htable in order to
↓ open down ↓ 35 lines elided ↑ open up ↑
3372 3373           * drop the mapping list lock so that we might free the
3373 3374           * hment and htable.
3374 3375           */
3375 3376          x86_hm_exit(pp);
3376 3377          htable_release(ht);
3377 3378          return (hm);
3378 3379  }
3379 3380  
3380 3381  extern int      vpm_enable;
3381 3382  /*
3382      - * Unload all translations to a page. If the page is a subpage of a large
     3383 + * Unload translations to a page. If the page is a subpage of a large
3383 3384   * page, the large page mappings are also removed.
3384      - *
3385      - * The forceflags are unused.
     3385 + * If curhat is not NULL, then we only unload the translation
     3386 + * for the given process, otherwise all translations are unloaded.
3386 3387   */
3387      -
3388      -/*ARGSUSED*/
3389      -static int
3390      -hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
     3388 +void
     3389 +hat_page_inval(struct page *pp, uint_t pg_szcd, struct hat *curhat)
3391 3390  {
3392 3391          page_t          *cur_pp = pp;
3393 3392          hment_t         *hm;
3394 3393          hment_t         *prev;
3395 3394          htable_t        *ht;
3396 3395          uint_t          entry;
3397 3396          level_t         level;
     3397 +        ulong_t         cnt;
3398 3398  
3399 3399          XPV_DISALLOW_MIGRATE();
3400 3400  
3401      -        /*
3402      -         * prevent recursion due to kmem_free()
3403      -         */
3404      -        ++curthread->t_hatdepth;
3405      -        ASSERT(curthread->t_hatdepth < 16);
3406      -
3407 3401  #if defined(__amd64)
3408 3402          /*
3409 3403           * clear the vpm ref.
3410 3404           */
3411 3405          if (vpm_enable) {
3412 3406                  pp->p_vpmref = 0;
3413 3407          }
3414 3408  #endif
3415 3409          /*
3416 3410           * The loop with next_size handles pages with multiple pagesize mappings
3417 3411           */
3418 3412  next_size:
     3413 +        if (curhat != NULL)
     3414 +                cnt = hat_page_getshare(cur_pp);
3419 3415          for (;;) {
3420 3416  
3421 3417                  /*
3422 3418                   * Get a mapping list entry
3423 3419                   */
3424 3420                  x86_hm_enter(cur_pp);
3425 3421                  for (prev = NULL; ; prev = hm) {
3426 3422                          hm = hment_walk(cur_pp, &ht, &entry, prev);
3427 3423                          if (hm == NULL) {
3428 3424                                  x86_hm_exit(cur_pp);
3429 3425  
     3426 +curproc_done:
3430 3427                                  /*
3431 3428                                   * If not part of a larger page, we're done.
3432 3429                                   */
3433 3430                                  if (cur_pp->p_szc <= pg_szcd) {
3434      -                                        ASSERT(curthread->t_hatdepth > 0);
3435      -                                        --curthread->t_hatdepth;
3436 3431                                          XPV_ALLOW_MIGRATE();
3437      -                                        return (0);
     3432 +                                        return;
3438 3433                                  }
3439 3434  
3440 3435                                  /*
3441 3436                                   * Else check the next larger page size.
3442 3437                                   * hat_page_demote() may decrease p_szc
3443 3438                                   * but that's ok we'll just take an extra
3444 3439                                   * trip discover there're no larger mappings
3445 3440                                   * and return.
3446 3441                                   */
3447 3442                                  ++pg_szcd;
3448 3443                                  cur_pp = PP_GROUPLEADER(cur_pp, pg_szcd);
3449 3444                                  goto next_size;
3450 3445                          }
3451 3446  
3452 3447                          /*
3453 3448                           * If this mapping size matches, remove it.
3454 3449                           */
3455 3450                          level = ht->ht_level;
3456      -                        if (level == pg_szcd)
3457      -                                break;
     3451 +                        if (level == pg_szcd) {
     3452 +                                if (curhat == NULL || ht->ht_hat == curhat)
     3453 +                                        break;
     3454 +                                /*
     3455 +                                 * Unloading only the given process but it's
     3456 +                                 * not the hat for the current process. Leave
     3457 +                                 * entry in place. Also do a safety check to
     3458 +                                 * ensure we don't get in an infinite loop
     3459 +                                 */
     3460 +                                if (cnt-- == 0) {
     3461 +                                        x86_hm_exit(cur_pp);
     3462 +                                        goto curproc_done;
     3463 +                                }
     3464 +                        }
3458 3465                  }
3459 3466  
3460 3467                  /*
3461 3468                   * Remove the mapping list entry for this page.
3462 3469                   * Note this does the x86_hm_exit() for us.
3463 3470                   */
3464 3471                  hm = hati_page_unmap(cur_pp, ht, entry);
3465 3472                  if (hm != NULL)
3466 3473                          hment_free(hm);
     3474 +
     3475 +                /* Perform check above for being part of a larger page. */
     3476 +                if (curhat != NULL)
     3477 +                        goto curproc_done;
3467 3478          }
3468 3479  }
3469 3480  
     3481 +/*
     3482 + * Unload translations to a page. If unloadflag is HAT_CURPROC_PGUNLOAD, then
     3483 + * we only unload the translation for the current process, otherwise all
     3484 + * translations are unloaded.
     3485 + */
     3486 +static int
     3487 +hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t unloadflag)
     3488 +{
     3489 +        struct hat      *curhat = NULL;
     3490 +
     3491 +        /*
     3492 +         * prevent recursion due to kmem_free()
     3493 +         */
     3494 +        ++curthread->t_hatdepth;
     3495 +        ASSERT(curthread->t_hatdepth < 16);
     3496 +
     3497 +        if (unloadflag == HAT_CURPROC_PGUNLOAD)
     3498 +                curhat = curthread->t_procp->p_as->a_hat;
     3499 +
     3500 +        hat_page_inval(pp, pg_szcd, curhat);
     3501 +
     3502 +        ASSERT(curthread->t_hatdepth > 0);
     3503 +        --curthread->t_hatdepth;
     3504 +        return (0);
     3505 +}
     3506 +
3470 3507  int
3471      -hat_pageunload(struct page *pp, uint_t forceflag)
     3508 +hat_pageunload(struct page *pp, uint_t unloadflag)
3472 3509  {
3473 3510          ASSERT(PAGE_EXCL(pp));
3474      -        return (hati_pageunload(pp, 0, forceflag));
     3511 +        return (hati_pageunload(pp, 0, unloadflag));
3475 3512  }
3476 3513  
3477 3514  /*
3478 3515   * Unload all large mappings to pp and reduce by 1 p_szc field of every large
3479 3516   * page level that included pp.
3480 3517   *
3481 3518   * pp must be locked EXCL. Even though no other constituent pages are locked
3482 3519   * it's legal to unload large mappings to pp because all constituent pages of
3483 3520   * large locked mappings have to be locked SHARED.  therefore if we have EXCL
3484 3521   * lock on one of constituent pages none of the large mappings to pp are
↓ open down ↓ 1008 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX