Print this page
OS-3088 need a lighterweight page invalidation mechanism for zone memcap
OS-881 To workaround OS-580 add support to only invalidate mappings from a single process

@@ -26,10 +26,11 @@
  * All rights reserved.
  */
 /*
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
+ * Copyright 2014 Joyent, Inc.  All rights reserved.
  */
 
 /*
  * VM - Hardware Address Translation management for i386 and amd64
  *

@@ -3321,11 +3322,11 @@
         return (PP_GETRM(pp, flag));
 }
 
 
 /*
- * common code used by hat_pageunload() and hment_steal()
+ * common code used by hat_page_inval() and hment_steal()
  */
 hment_t *
 hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
 {
         x86pte_t old_pte;

@@ -3377,35 +3378,28 @@
         return (hm);
 }
 
 extern int      vpm_enable;
 /*
- * Unload all translations to a page. If the page is a subpage of a large
+ * Unload translations to a page. If the page is a subpage of a large
  * page, the large page mappings are also removed.
- *
- * The forceflags are unused.
+ * If curhat is not NULL, then we only unload the translation
+ * for the given process, otherwise all translations are unloaded.
  */
-
-/*ARGSUSED*/
-static int
-hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
+void
+hat_page_inval(struct page *pp, uint_t pg_szcd, struct hat *curhat)
 {
         page_t          *cur_pp = pp;
         hment_t         *hm;
         hment_t         *prev;
         htable_t        *ht;
         uint_t          entry;
         level_t         level;
+        ulong_t         cnt;
 
         XPV_DISALLOW_MIGRATE();
 
-        /*
-         * prevent recursion due to kmem_free()
-         */
-        ++curthread->t_hatdepth;
-        ASSERT(curthread->t_hatdepth < 16);
-
 #if defined(__amd64)
         /*
          * clear the vpm ref.
          */
         if (vpm_enable) {

@@ -3414,10 +3408,12 @@
 #endif
         /*
          * The loop with next_size handles pages with multiple pagesize mappings
          */
 next_size:
+        if (curhat != NULL)
+                cnt = hat_page_getshare(cur_pp);
         for (;;) {
 
                 /*
                  * Get a mapping list entry
                  */

@@ -3425,18 +3421,17 @@
                 for (prev = NULL; ; prev = hm) {
                         hm = hment_walk(cur_pp, &ht, &entry, prev);
                         if (hm == NULL) {
                                 x86_hm_exit(cur_pp);
 
+curproc_done:
                                 /*
                                  * If not part of a larger page, we're done.
                                  */
                                 if (cur_pp->p_szc <= pg_szcd) {
-                                        ASSERT(curthread->t_hatdepth > 0);
-                                        --curthread->t_hatdepth;
                                         XPV_ALLOW_MIGRATE();
-                                        return (0);
+                                        return;
                                 }
 
                                 /*
                                  * Else check the next larger page size.
                                  * hat_page_demote() may decrease p_szc

@@ -3451,29 +3446,71 @@
 
                         /*
                          * If this mapping size matches, remove it.
                          */
                         level = ht->ht_level;
-                        if (level == pg_szcd)
+                        if (level == pg_szcd) {
+                                if (curhat == NULL || ht->ht_hat == curhat)
                                 break;
+                                /*
+                                 * Unloading only the given process but it's
+                                 * not the hat for the current process. Leave
+                                 * entry in place. Also do a safety check to
+                                 * ensure we don't get in an infinite loop
+                                 */
+                                if (cnt-- == 0) {
+                                        x86_hm_exit(cur_pp);
+                                        goto curproc_done;
                 }
+                        }
+                }
 
                 /*
                  * Remove the mapping list entry for this page.
                  * Note this does the x86_hm_exit() for us.
                  */
                 hm = hati_page_unmap(cur_pp, ht, entry);
                 if (hm != NULL)
                         hment_free(hm);
+
+                /* Perform check above for being part of a larger page. */
+                if (curhat != NULL)
+                        goto curproc_done;
         }
 }
 
+/*
+ * Unload translations to a page. If unloadflag is HAT_CURPROC_PGUNLOAD, then
+ * we only unload the translation for the current process, otherwise all
+ * translations are unloaded.
+ */
+static int
+hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t unloadflag)
+{
+        struct hat      *curhat = NULL;
+
+        /*
+         * prevent recursion due to kmem_free()
+         */
+        ++curthread->t_hatdepth;
+        ASSERT(curthread->t_hatdepth < 16);
+
+        if (unloadflag == HAT_CURPROC_PGUNLOAD)
+                curhat = curthread->t_procp->p_as->a_hat;
+
+        hat_page_inval(pp, pg_szcd, curhat);
+
+        ASSERT(curthread->t_hatdepth > 0);
+        --curthread->t_hatdepth;
+        return (0);
+}
+
 int
-hat_pageunload(struct page *pp, uint_t forceflag)
+hat_pageunload(struct page *pp, uint_t unloadflag)
 {
         ASSERT(PAGE_EXCL(pp));
-        return (hati_pageunload(pp, 0, forceflag));
+        return (hati_pageunload(pp, 0, unloadflag));
 }
 
 /*
  * Unload all large mappings to pp and reduce by 1 p_szc field of every large
  * page level that included pp.