io-lx-public-vs-joyent Udiff usr/src/uts/common/os/kmem.c

Print this page

@@ -18,11 +18,10 @@
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*

@@ -158,26 +157,14 @@
  *               wherever it expects to find known objects
  *            b) the client has removed the object from wherever it expects to
  *               find known objects and is about to free it, or
  *            c) the client has freed the object.
  *            In all these cases (a, b, and c) kmem frees the new object (the
- *            unused copy destination).  In the first case, the object is in
- *            use and the correct action is that for LATER; in the latter two
- *            cases, we know that the object is either freed or about to be
- *            freed, in which case it is either already in a magazine or about
- *            to be in one.  In these cases, we know that the object will either
- *            be reallocated and reused, or it will end up in a full magazine
- *            that will be reaped (thereby liberating the slab).  Because it
- *            is prohibitively expensive to differentiate these cases, and
- *            because the defrag code is executed when we're low on memory
- *            (thereby biasing the system to reclaim full magazines) we treat
- *            all DONT_KNOW cases as LATER and rely on cache reaping to
- *            generally clean up full magazines.  While we take the same action
- *            for these cases, we maintain their semantic distinction:  if
- *            defragmentation is not occurring, it is useful to know if this
- *            is due to objects in use (LATER) or objects in an unknown state
- *            of transition (DONT_KNOW).
+ *            unused copy destination) and searches for the old object in the
+ *            magazine layer. If found, the object is removed from the magazine
+ *            layer and freed to the slab layer so it will no longer hold the
+ *            slab hostage.
  *
  * 2.3 Object States
  *
  * Neither kmem nor the client can be assumed to know the object's whereabouts
  * at the time of the callback. An object belonging to a kmem cache may be in

@@ -296,14 +283,14 @@
  * be reused on a subsequent allocation without the overhead of calling the
  * constructor. While in the magazine it appears allocated from the point of
  * view of the slab layer, making it a candidate for the move callback. Most
  * objects unrecognized by the client in the move callback fall into this
  * category and are cheaply distinguished from known objects by the test
- * described earlier. Because searching magazines is prohibitively expensive
- * for kmem, clients that do not mark freed objects (and therefore return
- * KMEM_CBRC_DONT_KNOW for large numbers of objects) may find defragmentation
- * efficacy reduced.
+ * described earlier. Since recognition is cheap for the client, and searching
+ * magazines is expensive for kmem, kmem defers searching until the client first
+ * returns KMEM_CBRC_DONT_KNOW. As long as the needed effort is reasonable, kmem
+ * elsewhere does what it can to avoid bothering the client unnecessarily.
  *
  * Invalidating the designated pointer member before freeing the object marks
  * the object to be avoided in the callback, and conversely, assigning a valid
  * value to the designated pointer member after allocating the object makes the
  * object fair game for the callback:

@@ -1009,26 +996,17 @@
 uint32_t kmem_mtbf = 0;         /* mean time between failures [default: off] */
 size_t kmem_transaction_log_size; /* transaction log size [2% of memory] */
 size_t kmem_content_log_size;   /* content log size [2% of memory] */
 size_t kmem_failure_log_size;   /* failure log [4 pages per CPU] */
 size_t kmem_slab_log_size;      /* slab create log [4 pages per CPU] */
-size_t kmem_zerosized_log_size; /* zero-sized log [4 pages per CPU] */
 size_t kmem_content_maxsave = 256; /* KMF_CONTENTS max bytes to log */
 size_t kmem_lite_minsize = 0;   /* minimum buffer size for KMF_LITE */
 size_t kmem_lite_maxalign = 1024; /* maximum buffer alignment for KMF_LITE */
 int kmem_lite_pcs = 4;          /* number of PCs to store in KMF_LITE mode */
 size_t kmem_maxverify;          /* maximum bytes to inspect in debug routines */
 size_t kmem_minfirewall;        /* hardware-enforced redzone threshold */
 
-#ifdef DEBUG
-int kmem_warn_zerosized = 1;    /* whether to warn on zero-sized KM_SLEEP */
-#else
-int kmem_warn_zerosized = 0;    /* whether to warn on zero-sized KM_SLEEP */
-#endif
-
-int kmem_panic_zerosized = 0;   /* whether to panic on zero-sized KM_SLEEP */
-
 #ifdef _LP64
 size_t  kmem_max_cached = KMEM_BIG_MAXBUF;      /* maximum kmem_alloc cache */
 #else
 size_t  kmem_max_cached = KMEM_BIG_MAXBUF_32BIT; /* maximum kmem_alloc cache */
 #endif

@@ -1058,12 +1036,26 @@
 static vmem_t           *kmem_va_arena;
 static vmem_t           *kmem_default_arena;
 static vmem_t           *kmem_firewall_va_arena;
 static vmem_t           *kmem_firewall_arena;
 
-static int              kmem_zerosized;         /* # of zero-sized allocs */
+/*
+ * Define KMEM_STATS to turn on statistic gathering. By default, it is only
+ * turned on when DEBUG is also defined.
+ */
+#ifdef  DEBUG
+#define KMEM_STATS
+#endif  /* DEBUG */
 
+#ifdef  KMEM_STATS
+#define KMEM_STAT_ADD(stat)                     ((stat)++)
+#define KMEM_STAT_COND_ADD(cond, stat)          ((void) (!(cond) || (stat)++))
+#else
+#define KMEM_STAT_ADD(stat)                     /* nothing */
+#define KMEM_STAT_COND_ADD(cond, stat)          /* nothing */
+#endif  /* KMEM_STATS */
+
 /*
  * kmem slab consolidator thresholds (tunables)
  */
 size_t kmem_frag_minslabs = 101;        /* minimum total slabs */
 size_t kmem_frag_numer = 1;             /* free buffers (numerator) */

@@ -1077,10 +1069,51 @@
  * Number of slabs to scan backwards from the end of the partial slab list
  * when searching for buffers to relocate.
  */
 size_t kmem_reclaim_scan_range = 12;
 
+#ifdef  KMEM_STATS
+static struct {
+        uint64_t kms_callbacks;
+        uint64_t kms_yes;
+        uint64_t kms_no;
+        uint64_t kms_later;
+        uint64_t kms_dont_need;
+        uint64_t kms_dont_know;
+        uint64_t kms_hunt_found_mag;
+        uint64_t kms_hunt_found_slab;
+        uint64_t kms_hunt_alloc_fail;
+        uint64_t kms_hunt_lucky;
+        uint64_t kms_notify;
+        uint64_t kms_notify_callbacks;
+        uint64_t kms_disbelief;
+        uint64_t kms_already_pending;
+        uint64_t kms_callback_alloc_fail;
+        uint64_t kms_callback_taskq_fail;
+        uint64_t kms_endscan_slab_dead;
+        uint64_t kms_endscan_slab_destroyed;
+        uint64_t kms_endscan_nomem;
+        uint64_t kms_endscan_refcnt_changed;
+        uint64_t kms_endscan_nomove_changed;
+        uint64_t kms_endscan_freelist;
+        uint64_t kms_avl_update;
+        uint64_t kms_avl_noupdate;
+        uint64_t kms_no_longer_reclaimable;
+        uint64_t kms_notify_no_longer_reclaimable;
+        uint64_t kms_notify_slab_dead;
+        uint64_t kms_notify_slab_destroyed;
+        uint64_t kms_alloc_fail;
+        uint64_t kms_constructor_fail;
+        uint64_t kms_dead_slabs_freed;
+        uint64_t kms_defrags;
+        uint64_t kms_scans;
+        uint64_t kms_scan_depot_ws_reaps;
+        uint64_t kms_debug_reaps;
+        uint64_t kms_debug_scans;
+} kmem_move_stats;
+#endif  /* KMEM_STATS */
+
 /* consolidator knobs */
 static boolean_t kmem_move_noreap;
 static boolean_t kmem_move_blocked;
 static boolean_t kmem_move_fulltilt;
 static boolean_t kmem_move_any_partial;

@@ -1107,11 +1140,10 @@
 
 kmem_log_header_t       *kmem_transaction_log;
 kmem_log_header_t       *kmem_content_log;
 kmem_log_header_t       *kmem_failure_log;
 kmem_log_header_t       *kmem_slab_log;
-kmem_log_header_t       *kmem_zerosized_log;
 
 static int              kmem_lite_count; /* # of PCs in kmem_buftag_lite_t */
 
 #define KMEM_BUFTAG_LITE_ENTER(bt, count, caller)                       \
         if ((count) > 0) {                                              \

@@ -1888,11 +1920,19 @@
                 ASSERT(sp->slab_chunks > 1);
                 list_remove(&cp->cache_complete_slabs, sp);
                 cp->cache_complete_slab_count--;
                 avl_add(&cp->cache_partial_slabs, sp);
         } else {
+#ifdef  DEBUG
+                if (avl_update_gt(&cp->cache_partial_slabs, sp)) {
+                        KMEM_STAT_ADD(kmem_move_stats.kms_avl_update);
+                } else {
+                        KMEM_STAT_ADD(kmem_move_stats.kms_avl_noupdate);
+                }
+#else
                 (void) avl_update_gt(&cp->cache_partial_slabs, sp);
+#endif
         }
 
         ASSERT((cp->cache_slab_create - cp->cache_slab_destroy) ==
             (cp->cache_complete_slab_count +
             avl_numnodes(&cp->cache_partial_slabs) +

@@ -2922,38 +2962,13 @@
             kmem_big_alloc_table_max) {
                 cp = kmem_big_alloc_table[index];
                 /* fall through to kmem_cache_alloc() */
 
         } else {
-                if (size == 0) {
-                        if (kmflag != KM_SLEEP && !(kmflag & KM_PANIC))
+                if (size == 0)
                                 return (NULL);
 
-                        /*
-                         * If this is a sleeping allocation or one that has
-                         * been specified to panic on allocation failure, we
-                         * consider it to be deprecated behavior to allocate
-                         * 0 bytes.  If we have been configured to panic under
-                         * this condition, we panic; if to warn, we warn -- and
-                         * regardless, we log to the kmem_zerosized_log that
-                         * that this condition has occurred (which gives us
-                         * enough information to be able to debug it).
-                         */
-                        if (kmem_panic && kmem_panic_zerosized)
-                                panic("attempted to kmem_alloc() size of 0");
-
-                        if (kmem_warn_zerosized) {
-                                cmn_err(CE_WARN, "kmem_alloc(): sleeping "
-                                    "allocation with size of 0; "
-                                    "see kmem_zerosized_log for details");
-                        }
-
-                        kmem_log_event(kmem_zerosized_log, NULL, NULL, NULL);
-
-                        return (NULL);
-                }
-
                 buf = vmem_alloc(kmem_oversize_arena, size,
                     kmflag & KM_VMFLAGS);
                 if (buf == NULL)
                         kmem_log_event(kmem_failure_log, NULL, NULL,
                             (void *)size);

@@ -3562,11 +3577,11 @@
                 kmcp->kmc_move_yes.value.ui64           = kd->kmd_yes;
                 kmcp->kmc_move_no.value.ui64            = kd->kmd_no;
                 kmcp->kmc_move_later.value.ui64         = kd->kmd_later;
                 kmcp->kmc_move_dont_need.value.ui64     = kd->kmd_dont_need;
                 kmcp->kmc_move_dont_know.value.ui64     = kd->kmd_dont_know;
-                kmcp->kmc_move_hunt_found.value.ui64    = 0;
+                kmcp->kmc_move_hunt_found.value.ui64    = kd->kmd_hunt_found;
                 kmcp->kmc_move_slabs_freed.value.ui64   = kd->kmd_slabs_freed;
                 kmcp->kmc_defrag.value.ui64             = kd->kmd_defrags;
                 kmcp->kmc_scan.value.ui64               = kd->kmd_scans;
 
                 reclaimable = cp->cache_bufslab - (cp->cache_maxchunks - 1);

@@ -4472,12 +4487,12 @@
                         kmem_content_log_size = kmem_maxavail() / 50;
                 kmem_content_log = kmem_log_init(kmem_content_log_size);
         }
 
         kmem_failure_log = kmem_log_init(kmem_failure_log_size);
+
         kmem_slab_log = kmem_log_init(kmem_slab_log_size);
-        kmem_zerosized_log = kmem_log_init(kmem_zerosized_log_size);
 
         /*
          * Initialize STREAMS message caches so allocb() is available.
          * This allows us to initialize the logging framework (cmn_err(9F),
          * strlog(9F), etc) so we can start recording messages.

@@ -4661,11 +4676,99 @@
          */
         return ((refcnt * KMEM_VOID_FRACTION) <
             (sp->slab_chunks * cp->cache_defrag->kmd_reclaim_numer));
 }
 
+static void *
+kmem_hunt_mag(kmem_cache_t *cp, kmem_magazine_t *m, int n, void *buf,
+    void *tbuf)
+{
+        int i;          /* magazine round index */
+
+        for (i = 0; i < n; i++) {
+                if (buf == m->mag_round[i]) {
+                        if (cp->cache_flags & KMF_BUFTAG) {
+                                (void) kmem_cache_free_debug(cp, tbuf,
+                                    caller());
+                        }
+                        m->mag_round[i] = tbuf;
+                        return (buf);
+                }
+        }
+
+        return (NULL);
+}
+
 /*
+ * Hunt the magazine layer for the given buffer. If found, the buffer is
+ * removed from the magazine layer and returned, otherwise NULL is returned.
+ * The state of the returned buffer is freed and constructed.
+ */
+static void *
+kmem_hunt_mags(kmem_cache_t *cp, void *buf)
+{
+        kmem_cpu_cache_t *ccp;
+        kmem_magazine_t *m;
+        int cpu_seqid;
+        int n;          /* magazine rounds */
+        void *tbuf;     /* temporary swap buffer */
+
+        ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
+
+        /*
+         * Allocated a buffer to swap with the one we hope to pull out of a
+         * magazine when found.
+         */
+        tbuf = kmem_cache_alloc(cp, KM_NOSLEEP);
+        if (tbuf == NULL) {
+                KMEM_STAT_ADD(kmem_move_stats.kms_hunt_alloc_fail);
+                return (NULL);
+        }
+        if (tbuf == buf) {
+                KMEM_STAT_ADD(kmem_move_stats.kms_hunt_lucky);
+                if (cp->cache_flags & KMF_BUFTAG) {
+                        (void) kmem_cache_free_debug(cp, buf, caller());
+                }
+                return (buf);
+        }
+
+        /* Hunt the depot. */
+        mutex_enter(&cp->cache_depot_lock);
+        n = cp->cache_magtype->mt_magsize;
+        for (m = cp->cache_full.ml_list; m != NULL; m = m->mag_next) {
+                if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
+                        mutex_exit(&cp->cache_depot_lock);
+                        return (buf);
+                }
+        }
+        mutex_exit(&cp->cache_depot_lock);
+
+        /* Hunt the per-CPU magazines. */
+        for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++) {
+                ccp = &cp->cache_cpu[cpu_seqid];
+
+                mutex_enter(&ccp->cc_lock);
+                m = ccp->cc_loaded;
+                n = ccp->cc_rounds;
+                if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
+                        mutex_exit(&ccp->cc_lock);
+                        return (buf);
+                }
+                m = ccp->cc_ploaded;
+                n = ccp->cc_prounds;
+                if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
+                        mutex_exit(&ccp->cc_lock);
+                        return (buf);
+                }
+                mutex_exit(&ccp->cc_lock);
+        }
+
+        kmem_cache_free(cp, tbuf);
+        return (NULL);
+}
+
+/*
  * May be called from the kmem_move_taskq, from kmem_cache_move_notify_task(),
  * or when the buffer is freed.
  */
 static void
 kmem_slab_move_yes(kmem_cache_t *cp, kmem_slab_t *sp, void *from_buf)

@@ -4724,11 +4827,11 @@
  *
  * YES          kmem frees the old buffer (the move was successful)
  * NO           kmem frees the new buffer, marks the slab of the old buffer
  *              non-reclaimable to avoid bothering the client again
  * LATER        kmem frees the new buffer, increments slab_later_count
- * DONT_KNOW    kmem frees the new buffer
+ * DONT_KNOW    kmem frees the new buffer, searches mags for the old buffer
  * DONT_NEED    kmem frees both the old buffer and the new buffer
  *
  * The pending callback argument now being processed contains both of the
  * buffers (old and new) passed to the move callback function, the slab of the
  * old buffer, and flags related to the move request, such as whether or not the

@@ -4758,25 +4861,31 @@
          * last checked the slab's reclaimability (when the pending move was
          * enqueued), or the client may have responded NO when asked to move
          * another buffer on the same slab.
          */
         if (!kmem_slab_is_reclaimable(cp, sp, callback->kmm_flags)) {
+                KMEM_STAT_ADD(kmem_move_stats.kms_no_longer_reclaimable);
+                KMEM_STAT_COND_ADD((callback->kmm_flags & KMM_NOTIFY),
+                    kmem_move_stats.kms_notify_no_longer_reclaimable);
                 kmem_slab_free(cp, callback->kmm_to_buf);
                 kmem_move_end(cp, callback);
                 return;
         }
 
         /*
-         * Checking the slab layer is easy, so we might as well do that here
-         * in case we can avoid bothering the client.
+         * Hunting magazines is expensive, so we'll wait to do that until the
+         * client responds KMEM_CBRC_DONT_KNOW. However, checking the slab layer
+         * is cheap, so we might as well do that here in case we can avoid
+         * bothering the client.
          */
         mutex_enter(&cp->cache_lock);
         free_on_slab = (kmem_slab_allocated(cp, sp,
             callback->kmm_from_buf) == NULL);
         mutex_exit(&cp->cache_lock);
 
         if (free_on_slab) {
+                KMEM_STAT_ADD(kmem_move_stats.kms_hunt_found_slab);
                 kmem_slab_free(cp, callback->kmm_to_buf);
                 kmem_move_end(cp, callback);
                 return;
         }

@@ -4784,22 +4893,27 @@
                 /*
                  * Make kmem_cache_alloc_debug() apply the constructor for us.
                  */
                 if (kmem_cache_alloc_debug(cp, callback->kmm_to_buf,
                     KM_NOSLEEP, 1, caller()) != 0) {
+                        KMEM_STAT_ADD(kmem_move_stats.kms_alloc_fail);
                         kmem_move_end(cp, callback);
                         return;
                 }
         } else if (cp->cache_constructor != NULL &&
             cp->cache_constructor(callback->kmm_to_buf, cp->cache_private,
             KM_NOSLEEP) != 0) {
                 atomic_inc_64(&cp->cache_alloc_fail);
+                KMEM_STAT_ADD(kmem_move_stats.kms_constructor_fail);
                 kmem_slab_free(cp, callback->kmm_to_buf);
                 kmem_move_end(cp, callback);
                 return;
         }
 
+        KMEM_STAT_ADD(kmem_move_stats.kms_callbacks);
+        KMEM_STAT_COND_ADD((callback->kmm_flags & KMM_NOTIFY),
+            kmem_move_stats.kms_notify_callbacks);
         cp->cache_defrag->kmd_callbacks++;
         cp->cache_defrag->kmd_thread = curthread;
         cp->cache_defrag->kmd_from_buf = callback->kmm_from_buf;
         cp->cache_defrag->kmd_to_buf = callback->kmm_to_buf;
         DTRACE_PROBE2(kmem__move__start, kmem_cache_t *, cp, kmem_move_t *,

@@ -4813,10 +4927,11 @@
         cp->cache_defrag->kmd_thread = NULL;
         cp->cache_defrag->kmd_from_buf = NULL;
         cp->cache_defrag->kmd_to_buf = NULL;
 
         if (response == KMEM_CBRC_YES) {
+                KMEM_STAT_ADD(kmem_move_stats.kms_yes);
                 cp->cache_defrag->kmd_yes++;
                 kmem_slab_free_constructed(cp, callback->kmm_from_buf, B_FALSE);
                 /* slab safe to access until kmem_move_end() */
                 if (sp->slab_refcnt == 0)
                         cp->cache_defrag->kmd_slabs_freed++;

@@ -4827,56 +4942,58 @@
                 return;
         }
 
         switch (response) {
         case KMEM_CBRC_NO:
+                KMEM_STAT_ADD(kmem_move_stats.kms_no);
                 cp->cache_defrag->kmd_no++;
                 mutex_enter(&cp->cache_lock);
                 kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
                 mutex_exit(&cp->cache_lock);
                 break;
         case KMEM_CBRC_LATER:
+                KMEM_STAT_ADD(kmem_move_stats.kms_later);
                 cp->cache_defrag->kmd_later++;
                 mutex_enter(&cp->cache_lock);
                 if (!KMEM_SLAB_IS_PARTIAL(sp)) {
                         mutex_exit(&cp->cache_lock);
                         break;
                 }
 
                 if (++sp->slab_later_count >= KMEM_DISBELIEF) {
+                        KMEM_STAT_ADD(kmem_move_stats.kms_disbelief);
                         kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
                 } else if (!(sp->slab_flags & KMEM_SLAB_NOMOVE)) {
                         sp->slab_stuck_offset = KMEM_SLAB_OFFSET(sp,
                             callback->kmm_from_buf);
                 }
                 mutex_exit(&cp->cache_lock);
                 break;
         case KMEM_CBRC_DONT_NEED:
+                KMEM_STAT_ADD(kmem_move_stats.kms_dont_need);
                 cp->cache_defrag->kmd_dont_need++;
                 kmem_slab_free_constructed(cp, callback->kmm_from_buf, B_FALSE);
                 if (sp->slab_refcnt == 0)
                         cp->cache_defrag->kmd_slabs_freed++;
                 mutex_enter(&cp->cache_lock);
                 kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
                 mutex_exit(&cp->cache_lock);
                 break;
         case KMEM_CBRC_DONT_KNOW:
-                /*
-                 * If we don't know if we can move this buffer or not, we'll
-                 * just assume that we can't:  if the buffer is in fact free,
-                 * then it is sitting in one of the per-CPU magazines or in
-                 * a full magazine in the depot layer.  Either way, because
-                 * defrag is induced in the same logic that reaps a cache,
-                 * it's likely that full magazines will be returned to the
-                 * system soon (thereby accomplishing what we're trying to
-                 * accomplish here: return those magazines to their slabs).
-                 * Given this, any work that we might do now to locate a buffer
-                 * in a magazine is wasted (and expensive!) work; we bump
-                 * a counter in this case and otherwise assume that we can't
-                 * move it.
-                 */
+                KMEM_STAT_ADD(kmem_move_stats.kms_dont_know);
                 cp->cache_defrag->kmd_dont_know++;
+                if (kmem_hunt_mags(cp, callback->kmm_from_buf) != NULL) {
+                        KMEM_STAT_ADD(kmem_move_stats.kms_hunt_found_mag);
+                        cp->cache_defrag->kmd_hunt_found++;
+                        kmem_slab_free_constructed(cp, callback->kmm_from_buf,
+                            B_TRUE);
+                        if (sp->slab_refcnt == 0)
+                                cp->cache_defrag->kmd_slabs_freed++;
+                        mutex_enter(&cp->cache_lock);
+                        kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
+                        mutex_exit(&cp->cache_lock);
+                }
                 break;
         default:
                 panic("'%s' (%p) unexpected move callback response %d\n",
                     cp->cache_name, (void *)cp, response);
         }

@@ -4897,13 +5014,14 @@
         ASSERT(taskq_member(kmem_taskq, curthread));
         ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
         ASSERT(sp->slab_flags & KMEM_SLAB_MOVE_PENDING);
 
         callback = kmem_cache_alloc(kmem_move_cache, KM_NOSLEEP);
-
-        if (callback == NULL)
+        if (callback == NULL) {
+                KMEM_STAT_ADD(kmem_move_stats.kms_callback_alloc_fail);
                 return (B_FALSE);
+        }
 
         callback->kmm_from_slab = sp;
         callback->kmm_from_buf = buf;
         callback->kmm_flags = flags;

@@ -4924,10 +5042,11 @@
                  */
                 if (flags & KMM_DESPERATE) {
                         pending->kmm_flags |= KMM_DESPERATE;
                 }
                 mutex_exit(&cp->cache_lock);
+                KMEM_STAT_ADD(kmem_move_stats.kms_already_pending);
                 kmem_cache_free(kmem_move_cache, callback);
                 return (B_TRUE);
         }
 
         to_buf = kmem_slab_alloc_impl(cp, avl_first(&cp->cache_partial_slabs),

@@ -4937,10 +5056,11 @@
 
         mutex_exit(&cp->cache_lock);
 
         if (!taskq_dispatch(kmem_move_taskq, (task_func_t *)kmem_move_buffer,
             callback, TQ_NOSLEEP)) {
+                KMEM_STAT_ADD(kmem_move_stats.kms_callback_taskq_fail);
                 mutex_enter(&cp->cache_lock);
                 avl_remove(&cp->cache_defrag->kmd_moves_pending, callback);
                 mutex_exit(&cp->cache_lock);
                 kmem_slab_free(cp, to_buf);
                 kmem_cache_free(kmem_move_cache, callback);

@@ -4982,10 +5102,11 @@
                         }
                         cp->cache_defrag->kmd_deadcount--;
                         cp->cache_slab_destroy++;
                         mutex_exit(&cp->cache_lock);
                         kmem_slab_destroy(cp, sp);
+                        KMEM_STAT_ADD(kmem_move_stats.kms_dead_slabs_freed);
                         mutex_enter(&cp->cache_lock);
                 }
         }
         mutex_exit(&cp->cache_lock);
         kmem_cache_free(kmem_move_cache, callback);

@@ -5126,10 +5247,12 @@
                                          * destroyed along with any other slabs
                                          * on the deadlist after the last
                                          * pending move completes.
                                          */
                                         list_insert_head(deadlist, sp);
+                                        KMEM_STAT_ADD(kmem_move_stats.
+                                            kms_endscan_slab_dead);
                                         return (-1);
                                 }
 
                                 /*
                                  * Destroy the slab now if it was completely

@@ -5140,10 +5263,14 @@
                                  */
                                 cp->cache_defrag->kmd_deadcount--;
                                 cp->cache_slab_destroy++;
                                 mutex_exit(&cp->cache_lock);
                                 kmem_slab_destroy(cp, sp);
+                                KMEM_STAT_ADD(kmem_move_stats.
+                                    kms_dead_slabs_freed);
+                                KMEM_STAT_ADD(kmem_move_stats.
+                                    kms_endscan_slab_destroyed);
                                 mutex_enter(&cp->cache_lock);
                                 /*
                                  * Since we can't pick up the scan where we left
                                  * off, abort the scan and say nothing about the
                                  * number of reclaimable slabs.

@@ -5155,10 +5282,12 @@
                                 /*
                                  * Abort the scan if there is not enough memory
                                  * for the request and say nothing about the
                                  * number of reclaimable slabs.
                                  */
+                                KMEM_STAT_COND_ADD(s < max_slabs,
+                                    kmem_move_stats.kms_endscan_nomem);
                                 return (-1);
                         }
 
                         /*
                          * The slab's position changed while the lock was

@@ -5170,14 +5299,20 @@
                                  * If this is a KMM_DEBUG move, the slab_refcnt
                                  * may have changed because we allocated a
                                  * destination buffer on the same slab. In that
                                  * case, we're not interested in counting it.
                                  */
+                                KMEM_STAT_COND_ADD(!(flags & KMM_DEBUG) &&
+                                    (s < max_slabs),
+                                    kmem_move_stats.kms_endscan_refcnt_changed);
                                 return (-1);
                         }
-                        if ((sp->slab_flags & KMEM_SLAB_NOMOVE) != nomove)
+                        if ((sp->slab_flags & KMEM_SLAB_NOMOVE) != nomove) {
+                                KMEM_STAT_COND_ADD(s < max_slabs,
+                                    kmem_move_stats.kms_endscan_nomove_changed);
                                 return (-1);
+                        }
 
                         /*
                          * Generating a move request allocates a destination
                          * buffer from the slab layer, bumping the first partial
                          * slab if it is completely allocated. If the current

@@ -5200,10 +5335,15 @@
                         }
                 }
         }
 end_scan:
 
+        KMEM_STAT_COND_ADD(!(flags & KMM_DEBUG) &&
+            (s < max_slabs) &&
+            (sp == avl_first(&cp->cache_partial_slabs)),
+            kmem_move_stats.kms_endscan_freelist);
+
         return (s);
 }
 
 typedef struct kmem_move_notify_args {
         kmem_cache_t *kmna_cache;

@@ -5259,17 +5399,22 @@
 
                         if (!avl_is_empty(
                             &cp->cache_defrag->kmd_moves_pending)) {
                                 list_insert_head(deadlist, sp);
                                 mutex_exit(&cp->cache_lock);
+                                KMEM_STAT_ADD(kmem_move_stats.
+                                    kms_notify_slab_dead);
                                 return;
                         }
 
                         cp->cache_defrag->kmd_deadcount--;
                         cp->cache_slab_destroy++;
                         mutex_exit(&cp->cache_lock);
                         kmem_slab_destroy(cp, sp);
+                        KMEM_STAT_ADD(kmem_move_stats.kms_dead_slabs_freed);
+                        KMEM_STAT_ADD(kmem_move_stats.
+                            kms_notify_slab_destroyed);
                         return;
                 }
         } else {
                 kmem_slab_move_yes(cp, sp, buf);
         }

@@ -5279,10 +5424,11 @@
 void
 kmem_cache_move_notify(kmem_cache_t *cp, void *buf)
 {
         kmem_move_notify_args_t *args;
 
+        KMEM_STAT_ADD(kmem_move_stats.kms_notify);
         args = kmem_alloc(sizeof (kmem_move_notify_args_t), KM_NOSLEEP);
         if (args != NULL) {
                 args->kmna_cache = cp;
                 args->kmna_buf = buf;
                 if (!taskq_dispatch(kmem_taskq,

@@ -5301,10 +5447,11 @@
 
         mutex_enter(&cp->cache_lock);
         n = avl_numnodes(&cp->cache_partial_slabs);
         if (n > 1) {
                 /* kmem_move_buffers() drops and reacquires cache_lock */
+                KMEM_STAT_ADD(kmem_move_stats.kms_defrags);
                 cp->cache_defrag->kmd_defrags++;
                 (void) kmem_move_buffers(cp, n, 0, KMM_DESPERATE);
         }
         mutex_exit(&cp->cache_lock);
 }

@@ -5399,10 +5546,11 @@
                  * the definition of a candidate slab if we're having trouble
                  * finding them.
                  *
                  * kmem_move_buffers() drops and reacquires cache_lock.
                  */
+                KMEM_STAT_ADD(kmem_move_stats.kms_scans);
                 kmd->kmd_scans++;
                 slabs_found = kmem_move_buffers(cp, kmem_reclaim_scan_range,
                     kmem_reclaim_max_slabs, 0);
                 if (slabs_found >= 0) {
                         kmd->kmd_slabs_sought += kmem_reclaim_max_slabs;

@@ -5439,13 +5587,16 @@
 
                         (void) random_get_bytes((uint8_t *)&debug_rand, 2);
                         if (!kmem_move_noreap &&
                             ((debug_rand % kmem_mtb_reap) == 0)) {
                                 mutex_exit(&cp->cache_lock);
+                                KMEM_STAT_ADD(kmem_move_stats.kms_debug_reaps);
                                 kmem_cache_reap(cp);
                                 return;
                         } else if ((debug_rand % kmem_mtb_move) == 0) {
+                                KMEM_STAT_ADD(kmem_move_stats.kms_scans);
+                                KMEM_STAT_ADD(kmem_move_stats.kms_debug_scans);
                                 kmd->kmd_scans++;
                                 (void) kmem_move_buffers(cp,
                                     kmem_reclaim_scan_range, 1, KMM_DEBUG);
                         }
                 }

@@ -5452,8 +5603,10 @@
 #endif  /* DEBUG */
         }
 
         mutex_exit(&cp->cache_lock);
 
-        if (reap)
+        if (reap) {
+                KMEM_STAT_ADD(kmem_move_stats.kms_scan_depot_ws_reaps);
                 kmem_depot_ws_reap(cp);
+        }
 }