Print this page
OS-3088 need a lighterweight page invalidation mechanism for zone memcap
OS-881 To workaround OS-580 add support to only invalidate mappings from a single process
        
*** 26,35 ****
--- 26,36 ----
   * All rights reserved.
   */
  /*
   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
   * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
+  * Copyright 2014 Joyent, Inc.  All rights reserved.
   */
  
  /*
   * VM - Hardware Address Translation management for i386 and amd64
   *
*** 3321,3331 ****
          return (PP_GETRM(pp, flag));
  }
  
  
  /*
!  * common code used by hat_pageunload() and hment_steal()
   */
  hment_t *
  hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
  {
          x86pte_t old_pte;
--- 3322,3332 ----
          return (PP_GETRM(pp, flag));
  }
  
  
  /*
!  * common code used by hat_page_inval() and hment_steal()
   */
  hment_t *
  hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
  {
          x86pte_t old_pte;
*** 3377,3411 ****
          return (hm);
  }
  
  extern int      vpm_enable;
  /*
!  * Unload all translations to a page. If the page is a subpage of a large
   * page, the large page mappings are also removed.
!  *
!  * The forceflags are unused.
   */
! 
! /*ARGSUSED*/
! static int
! hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
  {
          page_t          *cur_pp = pp;
          hment_t         *hm;
          hment_t         *prev;
          htable_t        *ht;
          uint_t          entry;
          level_t         level;
  
          XPV_DISALLOW_MIGRATE();
  
-         /*
-          * prevent recursion due to kmem_free()
-          */
-         ++curthread->t_hatdepth;
-         ASSERT(curthread->t_hatdepth < 16);
- 
  #if defined(__amd64)
          /*
           * clear the vpm ref.
           */
          if (vpm_enable) {
--- 3378,3405 ----
          return (hm);
  }
  
  extern int      vpm_enable;
  /*
!  * Unload translations to a page. If the page is a subpage of a large
   * page, the large page mappings are also removed.
!  * If curhat is not NULL, then we only unload the translation
!  * for the given process, otherwise all translations are unloaded.
   */
! void
! hat_page_inval(struct page *pp, uint_t pg_szcd, struct hat *curhat)
  {
          page_t          *cur_pp = pp;
          hment_t         *hm;
          hment_t         *prev;
          htable_t        *ht;
          uint_t          entry;
          level_t         level;
+         ulong_t         cnt;
  
          XPV_DISALLOW_MIGRATE();
  
  #if defined(__amd64)
          /*
           * clear the vpm ref.
           */
          if (vpm_enable) {
*** 3414,3423 ****
--- 3408,3419 ----
  #endif
          /*
           * The loop with next_size handles pages with multiple pagesize mappings
           */
  next_size:
+         if (curhat != NULL)
+                 cnt = hat_page_getshare(cur_pp);
          for (;;) {
  
                  /*
                   * Get a mapping list entry
                   */
*** 3425,3442 ****
                  for (prev = NULL; ; prev = hm) {
                          hm = hment_walk(cur_pp, &ht, &entry, prev);
                          if (hm == NULL) {
                                  x86_hm_exit(cur_pp);
  
                                  /*
                                   * If not part of a larger page, we're done.
                                   */
                                  if (cur_pp->p_szc <= pg_szcd) {
-                                         ASSERT(curthread->t_hatdepth > 0);
-                                         --curthread->t_hatdepth;
                                          XPV_ALLOW_MIGRATE();
!                                         return (0);
                                  }
  
                                  /*
                                   * Else check the next larger page size.
                                   * hat_page_demote() may decrease p_szc
--- 3421,3437 ----
                  for (prev = NULL; ; prev = hm) {
                          hm = hment_walk(cur_pp, &ht, &entry, prev);
                          if (hm == NULL) {
                                  x86_hm_exit(cur_pp);
  
+ curproc_done:
                                  /*
                                   * If not part of a larger page, we're done.
                                   */
                                  if (cur_pp->p_szc <= pg_szcd) {
                                          XPV_ALLOW_MIGRATE();
!                                         return;
                                  }
  
                                  /*
                                   * Else check the next larger page size.
                                   * hat_page_demote() may decrease p_szc
*** 3451,3479 ****
  
                          /*
                           * If this mapping size matches, remove it.
                           */
                          level = ht->ht_level;
!                         if (level == pg_szcd)
                                  break;
                  }
  
                  /*
                   * Remove the mapping list entry for this page.
                   * Note this does the x86_hm_exit() for us.
                   */
                  hm = hati_page_unmap(cur_pp, ht, entry);
                  if (hm != NULL)
                          hment_free(hm);
          }
  }
  
  int
! hat_pageunload(struct page *pp, uint_t forceflag)
  {
          ASSERT(PAGE_EXCL(pp));
!         return (hati_pageunload(pp, 0, forceflag));
  }
  
  /*
   * Unload all large mappings to pp and reduce by 1 p_szc field of every large
   * page level that included pp.
--- 3446,3516 ----
  
                          /*
                           * If this mapping size matches, remove it.
                           */
                          level = ht->ht_level;
!                         if (level == pg_szcd) {
!                                 if (curhat == NULL || ht->ht_hat == curhat)
                                          break;
+                                 /*
+                                  * Unloading only the given process but it's
+                                  * not the hat for the current process. Leave
+                                  * entry in place. Also do a safety check to
+                                  * ensure we don't get in an infinite loop
+                                  */
+                                 if (cnt-- == 0) {
+                                         x86_hm_exit(cur_pp);
+                                         goto curproc_done;
                                  }
+                         }
+                 }
  
                  /*
                   * Remove the mapping list entry for this page.
                   * Note this does the x86_hm_exit() for us.
                   */
                  hm = hati_page_unmap(cur_pp, ht, entry);
                  if (hm != NULL)
                          hment_free(hm);
+ 
+                 /* Perform check above for being part of a larger page. */
+                 if (curhat != NULL)
+                         goto curproc_done;
          }
  }
  
+ /*
+  * Unload translations to a page. If unloadflag is HAT_CURPROC_PGUNLOAD, then
+  * we only unload the translation for the current process, otherwise all
+  * translations are unloaded.
+  */
+ static int
+ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t unloadflag)
+ {
+         struct hat      *curhat = NULL;
+ 
+         /*
+          * prevent recursion due to kmem_free()
+          */
+         ++curthread->t_hatdepth;
+         ASSERT(curthread->t_hatdepth < 16);
+ 
+         if (unloadflag == HAT_CURPROC_PGUNLOAD)
+                 curhat = curthread->t_procp->p_as->a_hat;
+ 
+         hat_page_inval(pp, pg_szcd, curhat);
+ 
+         ASSERT(curthread->t_hatdepth > 0);
+         --curthread->t_hatdepth;
+         return (0);
+ }
+ 
  int
! hat_pageunload(struct page *pp, uint_t unloadflag)
  {
          ASSERT(PAGE_EXCL(pp));
!         return (hati_pageunload(pp, 0, unloadflag));
  }
  
  /*
   * Unload all large mappings to pp and reduce by 1 p_szc field of every large
   * page level that included pp.