io-lx-public-vs-joyent Cdiff usr/src/uts/common/os/kmem.c

Print this page

*** 18,28 ****
   *
   * CDDL HEADER END
   */
  /*
   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
-  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
   * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
   */
  
  /*
--- 18,27 ----
*** 158,183 ****
   *               wherever it expects to find known objects
   *            b) the client has removed the object from wherever it expects to
   *               find known objects and is about to free it, or
   *            c) the client has freed the object.
   *            In all these cases (a, b, and c) kmem frees the new object (the
!  *            unused copy destination).  In the first case, the object is in
!  *            use and the correct action is that for LATER; in the latter two
!  *            cases, we know that the object is either freed or about to be
!  *            freed, in which case it is either already in a magazine or about
!  *            to be in one.  In these cases, we know that the object will either
!  *            be reallocated and reused, or it will end up in a full magazine
!  *            that will be reaped (thereby liberating the slab).  Because it
!  *            is prohibitively expensive to differentiate these cases, and
!  *            because the defrag code is executed when we're low on memory
!  *            (thereby biasing the system to reclaim full magazines) we treat
!  *            all DONT_KNOW cases as LATER and rely on cache reaping to
!  *            generally clean up full magazines.  While we take the same action
!  *            for these cases, we maintain their semantic distinction:  if
!  *            defragmentation is not occurring, it is useful to know if this
!  *            is due to objects in use (LATER) or objects in an unknown state
!  *            of transition (DONT_KNOW).
   *
   * 2.3 Object States
   *
   * Neither kmem nor the client can be assumed to know the object's whereabouts
   * at the time of the callback. An object belonging to a kmem cache may be in
--- 157,170 ----
   *               wherever it expects to find known objects
   *            b) the client has removed the object from wherever it expects to
   *               find known objects and is about to free it, or
   *            c) the client has freed the object.
   *            In all these cases (a, b, and c) kmem frees the new object (the
!  *            unused copy destination) and searches for the old object in the
!  *            magazine layer. If found, the object is removed from the magazine
!  *            layer and freed to the slab layer so it will no longer hold the
!  *            slab hostage.
   *
   * 2.3 Object States
   *
   * Neither kmem nor the client can be assumed to know the object's whereabouts
   * at the time of the callback. An object belonging to a kmem cache may be in
*** 296,309 ****
   * be reused on a subsequent allocation without the overhead of calling the
   * constructor. While in the magazine it appears allocated from the point of
   * view of the slab layer, making it a candidate for the move callback. Most
   * objects unrecognized by the client in the move callback fall into this
   * category and are cheaply distinguished from known objects by the test
!  * described earlier. Because searching magazines is prohibitively expensive
!  * for kmem, clients that do not mark freed objects (and therefore return
!  * KMEM_CBRC_DONT_KNOW for large numbers of objects) may find defragmentation
!  * efficacy reduced.
   *
   * Invalidating the designated pointer member before freeing the object marks
   * the object to be avoided in the callback, and conversely, assigning a valid
   * value to the designated pointer member after allocating the object makes the
   * object fair game for the callback:
--- 283,296 ----
   * be reused on a subsequent allocation without the overhead of calling the
   * constructor. While in the magazine it appears allocated from the point of
   * view of the slab layer, making it a candidate for the move callback. Most
   * objects unrecognized by the client in the move callback fall into this
   * category and are cheaply distinguished from known objects by the test
!  * described earlier. Since recognition is cheap for the client, and searching
!  * magazines is expensive for kmem, kmem defers searching until the client first
!  * returns KMEM_CBRC_DONT_KNOW. As long as the needed effort is reasonable, kmem
!  * elsewhere does what it can to avoid bothering the client unnecessarily.
   *
   * Invalidating the designated pointer member before freeing the object marks
   * the object to be avoided in the callback, and conversely, assigning a valid
   * value to the designated pointer member after allocating the object makes the
   * object fair game for the callback:
*** 1009,1034 ****
  uint32_t kmem_mtbf = 0;         /* mean time between failures [default: off] */
  size_t kmem_transaction_log_size; /* transaction log size [2% of memory] */
  size_t kmem_content_log_size;   /* content log size [2% of memory] */
  size_t kmem_failure_log_size;   /* failure log [4 pages per CPU] */
  size_t kmem_slab_log_size;      /* slab create log [4 pages per CPU] */
- size_t kmem_zerosized_log_size; /* zero-sized log [4 pages per CPU] */
  size_t kmem_content_maxsave = 256; /* KMF_CONTENTS max bytes to log */
  size_t kmem_lite_minsize = 0;   /* minimum buffer size for KMF_LITE */
  size_t kmem_lite_maxalign = 1024; /* maximum buffer alignment for KMF_LITE */
  int kmem_lite_pcs = 4;          /* number of PCs to store in KMF_LITE mode */
  size_t kmem_maxverify;          /* maximum bytes to inspect in debug routines */
  size_t kmem_minfirewall;        /* hardware-enforced redzone threshold */
  
- #ifdef DEBUG
- int kmem_warn_zerosized = 1;    /* whether to warn on zero-sized KM_SLEEP */
- #else
- int kmem_warn_zerosized = 0;    /* whether to warn on zero-sized KM_SLEEP */
- #endif
- 
- int kmem_panic_zerosized = 0;   /* whether to panic on zero-sized KM_SLEEP */
- 
  #ifdef _LP64
  size_t  kmem_max_cached = KMEM_BIG_MAXBUF;      /* maximum kmem_alloc cache */
  #else
  size_t  kmem_max_cached = KMEM_BIG_MAXBUF_32BIT; /* maximum kmem_alloc cache */
  #endif
--- 996,1012 ----
*** 1058,1069 ****
  static vmem_t           *kmem_va_arena;
  static vmem_t           *kmem_default_arena;
  static vmem_t           *kmem_firewall_va_arena;
  static vmem_t           *kmem_firewall_arena;
  
! static int              kmem_zerosized;         /* # of zero-sized allocs */
  
  /*
   * kmem slab consolidator thresholds (tunables)
   */
  size_t kmem_frag_minslabs = 101;        /* minimum total slabs */
  size_t kmem_frag_numer = 1;             /* free buffers (numerator) */
--- 1036,1061 ----
  static vmem_t           *kmem_va_arena;
  static vmem_t           *kmem_default_arena;
  static vmem_t           *kmem_firewall_va_arena;
  static vmem_t           *kmem_firewall_arena;
  
! /*
!  * Define KMEM_STATS to turn on statistic gathering. By default, it is only
!  * turned on when DEBUG is also defined.
!  */
! #ifdef  DEBUG
! #define KMEM_STATS
! #endif  /* DEBUG */
  
+ #ifdef  KMEM_STATS
+ #define KMEM_STAT_ADD(stat)                     ((stat)++)
+ #define KMEM_STAT_COND_ADD(cond, stat)          ((void) (!(cond) || (stat)++))
+ #else
+ #define KMEM_STAT_ADD(stat)                     /* nothing */
+ #define KMEM_STAT_COND_ADD(cond, stat)          /* nothing */
+ #endif  /* KMEM_STATS */
+ 
  /*
   * kmem slab consolidator thresholds (tunables)
   */
  size_t kmem_frag_minslabs = 101;        /* minimum total slabs */
  size_t kmem_frag_numer = 1;             /* free buffers (numerator) */
*** 1077,1086 ****
--- 1069,1119 ----
   * Number of slabs to scan backwards from the end of the partial slab list
   * when searching for buffers to relocate.
   */
  size_t kmem_reclaim_scan_range = 12;
  
+ #ifdef  KMEM_STATS
+ static struct {
+         uint64_t kms_callbacks;
+         uint64_t kms_yes;
+         uint64_t kms_no;
+         uint64_t kms_later;
+         uint64_t kms_dont_need;
+         uint64_t kms_dont_know;
+         uint64_t kms_hunt_found_mag;
+         uint64_t kms_hunt_found_slab;
+         uint64_t kms_hunt_alloc_fail;
+         uint64_t kms_hunt_lucky;
+         uint64_t kms_notify;
+         uint64_t kms_notify_callbacks;
+         uint64_t kms_disbelief;
+         uint64_t kms_already_pending;
+         uint64_t kms_callback_alloc_fail;
+         uint64_t kms_callback_taskq_fail;
+         uint64_t kms_endscan_slab_dead;
+         uint64_t kms_endscan_slab_destroyed;
+         uint64_t kms_endscan_nomem;
+         uint64_t kms_endscan_refcnt_changed;
+         uint64_t kms_endscan_nomove_changed;
+         uint64_t kms_endscan_freelist;
+         uint64_t kms_avl_update;
+         uint64_t kms_avl_noupdate;
+         uint64_t kms_no_longer_reclaimable;
+         uint64_t kms_notify_no_longer_reclaimable;
+         uint64_t kms_notify_slab_dead;
+         uint64_t kms_notify_slab_destroyed;
+         uint64_t kms_alloc_fail;
+         uint64_t kms_constructor_fail;
+         uint64_t kms_dead_slabs_freed;
+         uint64_t kms_defrags;
+         uint64_t kms_scans;
+         uint64_t kms_scan_depot_ws_reaps;
+         uint64_t kms_debug_reaps;
+         uint64_t kms_debug_scans;
+ } kmem_move_stats;
+ #endif  /* KMEM_STATS */
+ 
  /* consolidator knobs */
  static boolean_t kmem_move_noreap;
  static boolean_t kmem_move_blocked;
  static boolean_t kmem_move_fulltilt;
  static boolean_t kmem_move_any_partial;
*** 1107,1117 ****
  
  kmem_log_header_t       *kmem_transaction_log;
  kmem_log_header_t       *kmem_content_log;
  kmem_log_header_t       *kmem_failure_log;
  kmem_log_header_t       *kmem_slab_log;
- kmem_log_header_t       *kmem_zerosized_log;
  
  static int              kmem_lite_count; /* # of PCs in kmem_buftag_lite_t */
  
  #define KMEM_BUFTAG_LITE_ENTER(bt, count, caller)                       \
          if ((count) > 0) {                                              \
--- 1140,1149 ----
*** 1888,1898 ****
--- 1920,1938 ----
                  ASSERT(sp->slab_chunks > 1);
                  list_remove(&cp->cache_complete_slabs, sp);
                  cp->cache_complete_slab_count--;
                  avl_add(&cp->cache_partial_slabs, sp);
          } else {
+ #ifdef  DEBUG
+                 if (avl_update_gt(&cp->cache_partial_slabs, sp)) {
+                         KMEM_STAT_ADD(kmem_move_stats.kms_avl_update);
+                 } else {
+                         KMEM_STAT_ADD(kmem_move_stats.kms_avl_noupdate);
+                 }
+ #else
                  (void) avl_update_gt(&cp->cache_partial_slabs, sp);
+ #endif
          }
  
          ASSERT((cp->cache_slab_create - cp->cache_slab_destroy) ==
              (cp->cache_complete_slab_count +
              avl_numnodes(&cp->cache_partial_slabs) +
*** 2922,2959 ****
              kmem_big_alloc_table_max) {
                  cp = kmem_big_alloc_table[index];
                  /* fall through to kmem_cache_alloc() */
  
          } else {
!                 if (size == 0) {
!                         if (kmflag != KM_SLEEP && !(kmflag & KM_PANIC))
                                  return (NULL);
  
-                         /*
-                          * If this is a sleeping allocation or one that has
-                          * been specified to panic on allocation failure, we
-                          * consider it to be deprecated behavior to allocate
-                          * 0 bytes.  If we have been configured to panic under
-                          * this condition, we panic; if to warn, we warn -- and
-                          * regardless, we log to the kmem_zerosized_log that
-                          * that this condition has occurred (which gives us
-                          * enough information to be able to debug it).
-                          */
-                         if (kmem_panic && kmem_panic_zerosized)
-                                 panic("attempted to kmem_alloc() size of 0");
- 
-                         if (kmem_warn_zerosized) {
-                                 cmn_err(CE_WARN, "kmem_alloc(): sleeping "
-                                     "allocation with size of 0; "
-                                     "see kmem_zerosized_log for details");
-                         }
- 
-                         kmem_log_event(kmem_zerosized_log, NULL, NULL, NULL);
- 
-                         return (NULL);
-                 }
- 
                  buf = vmem_alloc(kmem_oversize_arena, size,
                      kmflag & KM_VMFLAGS);
                  if (buf == NULL)
                          kmem_log_event(kmem_failure_log, NULL, NULL,
                              (void *)size);
--- 2962,2974 ----
              kmem_big_alloc_table_max) {
                  cp = kmem_big_alloc_table[index];
                  /* fall through to kmem_cache_alloc() */
  
          } else {
!                 if (size == 0)
                          return (NULL);
  
                  buf = vmem_alloc(kmem_oversize_arena, size,
                      kmflag & KM_VMFLAGS);
                  if (buf == NULL)
                          kmem_log_event(kmem_failure_log, NULL, NULL,
                              (void *)size);
*** 3562,3572 ****
                  kmcp->kmc_move_yes.value.ui64           = kd->kmd_yes;
                  kmcp->kmc_move_no.value.ui64            = kd->kmd_no;
                  kmcp->kmc_move_later.value.ui64         = kd->kmd_later;
                  kmcp->kmc_move_dont_need.value.ui64     = kd->kmd_dont_need;
                  kmcp->kmc_move_dont_know.value.ui64     = kd->kmd_dont_know;
!                 kmcp->kmc_move_hunt_found.value.ui64    = 0;
                  kmcp->kmc_move_slabs_freed.value.ui64   = kd->kmd_slabs_freed;
                  kmcp->kmc_defrag.value.ui64             = kd->kmd_defrags;
                  kmcp->kmc_scan.value.ui64               = kd->kmd_scans;
  
                  reclaimable = cp->cache_bufslab - (cp->cache_maxchunks - 1);
--- 3577,3587 ----
                  kmcp->kmc_move_yes.value.ui64           = kd->kmd_yes;
                  kmcp->kmc_move_no.value.ui64            = kd->kmd_no;
                  kmcp->kmc_move_later.value.ui64         = kd->kmd_later;
                  kmcp->kmc_move_dont_need.value.ui64     = kd->kmd_dont_need;
                  kmcp->kmc_move_dont_know.value.ui64     = kd->kmd_dont_know;
!                 kmcp->kmc_move_hunt_found.value.ui64    = kd->kmd_hunt_found;
                  kmcp->kmc_move_slabs_freed.value.ui64   = kd->kmd_slabs_freed;
                  kmcp->kmc_defrag.value.ui64             = kd->kmd_defrags;
                  kmcp->kmc_scan.value.ui64               = kd->kmd_scans;
  
                  reclaimable = cp->cache_bufslab - (cp->cache_maxchunks - 1);
*** 4472,4483 ****
                          kmem_content_log_size = kmem_maxavail() / 50;
                  kmem_content_log = kmem_log_init(kmem_content_log_size);
          }
  
          kmem_failure_log = kmem_log_init(kmem_failure_log_size);
          kmem_slab_log = kmem_log_init(kmem_slab_log_size);
-         kmem_zerosized_log = kmem_log_init(kmem_zerosized_log_size);
  
          /*
           * Initialize STREAMS message caches so allocb() is available.
           * This allows us to initialize the logging framework (cmn_err(9F),
           * strlog(9F), etc) so we can start recording messages.
--- 4487,4498 ----
                          kmem_content_log_size = kmem_maxavail() / 50;
                  kmem_content_log = kmem_log_init(kmem_content_log_size);
          }
  
          kmem_failure_log = kmem_log_init(kmem_failure_log_size);
+ 
          kmem_slab_log = kmem_log_init(kmem_slab_log_size);
  
          /*
           * Initialize STREAMS message caches so allocb() is available.
           * This allows us to initialize the logging framework (cmn_err(9F),
           * strlog(9F), etc) so we can start recording messages.
*** 4661,4671 ****
--- 4676,4774 ----
           */
          return ((refcnt * KMEM_VOID_FRACTION) <
              (sp->slab_chunks * cp->cache_defrag->kmd_reclaim_numer));
  }
  
+ static void *
+ kmem_hunt_mag(kmem_cache_t *cp, kmem_magazine_t *m, int n, void *buf,
+     void *tbuf)
+ {
+         int i;          /* magazine round index */
+ 
+         for (i = 0; i < n; i++) {
+                 if (buf == m->mag_round[i]) {
+                         if (cp->cache_flags & KMF_BUFTAG) {
+                                 (void) kmem_cache_free_debug(cp, tbuf,
+                                     caller());
+                         }
+                         m->mag_round[i] = tbuf;
+                         return (buf);
+                 }
+         }
+ 
+         return (NULL);
+ }
+ 
  /*
+  * Hunt the magazine layer for the given buffer. If found, the buffer is
+  * removed from the magazine layer and returned, otherwise NULL is returned.
+  * The state of the returned buffer is freed and constructed.
+  */
+ static void *
+ kmem_hunt_mags(kmem_cache_t *cp, void *buf)
+ {
+         kmem_cpu_cache_t *ccp;
+         kmem_magazine_t *m;
+         int cpu_seqid;
+         int n;          /* magazine rounds */
+         void *tbuf;     /* temporary swap buffer */
+ 
+         ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
+ 
+         /*
+          * Allocated a buffer to swap with the one we hope to pull out of a
+          * magazine when found.
+          */
+         tbuf = kmem_cache_alloc(cp, KM_NOSLEEP);
+         if (tbuf == NULL) {
+                 KMEM_STAT_ADD(kmem_move_stats.kms_hunt_alloc_fail);
+                 return (NULL);
+         }
+         if (tbuf == buf) {
+                 KMEM_STAT_ADD(kmem_move_stats.kms_hunt_lucky);
+                 if (cp->cache_flags & KMF_BUFTAG) {
+                         (void) kmem_cache_free_debug(cp, buf, caller());
+                 }
+                 return (buf);
+         }
+ 
+         /* Hunt the depot. */
+         mutex_enter(&cp->cache_depot_lock);
+         n = cp->cache_magtype->mt_magsize;
+         for (m = cp->cache_full.ml_list; m != NULL; m = m->mag_next) {
+                 if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
+                         mutex_exit(&cp->cache_depot_lock);
+                         return (buf);
+                 }
+         }
+         mutex_exit(&cp->cache_depot_lock);
+ 
+         /* Hunt the per-CPU magazines. */
+         for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++) {
+                 ccp = &cp->cache_cpu[cpu_seqid];
+ 
+                 mutex_enter(&ccp->cc_lock);
+                 m = ccp->cc_loaded;
+                 n = ccp->cc_rounds;
+                 if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
+                         mutex_exit(&ccp->cc_lock);
+                         return (buf);
+                 }
+                 m = ccp->cc_ploaded;
+                 n = ccp->cc_prounds;
+                 if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
+                         mutex_exit(&ccp->cc_lock);
+                         return (buf);
+                 }
+                 mutex_exit(&ccp->cc_lock);
+         }
+ 
+         kmem_cache_free(cp, tbuf);
+         return (NULL);
+ }
+ 
+ /*
   * May be called from the kmem_move_taskq, from kmem_cache_move_notify_task(),
   * or when the buffer is freed.
   */
  static void
  kmem_slab_move_yes(kmem_cache_t *cp, kmem_slab_t *sp, void *from_buf)
*** 4724,4734 ****
   *
   * YES          kmem frees the old buffer (the move was successful)
   * NO           kmem frees the new buffer, marks the slab of the old buffer
   *              non-reclaimable to avoid bothering the client again
   * LATER        kmem frees the new buffer, increments slab_later_count
!  * DONT_KNOW    kmem frees the new buffer
   * DONT_NEED    kmem frees both the old buffer and the new buffer
   *
   * The pending callback argument now being processed contains both of the
   * buffers (old and new) passed to the move callback function, the slab of the
   * old buffer, and flags related to the move request, such as whether or not the
--- 4827,4837 ----
   *
   * YES          kmem frees the old buffer (the move was successful)
   * NO           kmem frees the new buffer, marks the slab of the old buffer
   *              non-reclaimable to avoid bothering the client again
   * LATER        kmem frees the new buffer, increments slab_later_count
!  * DONT_KNOW    kmem frees the new buffer, searches mags for the old buffer
   * DONT_NEED    kmem frees both the old buffer and the new buffer
   *
   * The pending callback argument now being processed contains both of the
   * buffers (old and new) passed to the move callback function, the slab of the
   * old buffer, and flags related to the move request, such as whether or not the
*** 4758,4782 ****
           * last checked the slab's reclaimability (when the pending move was
           * enqueued), or the client may have responded NO when asked to move
           * another buffer on the same slab.
           */
          if (!kmem_slab_is_reclaimable(cp, sp, callback->kmm_flags)) {
                  kmem_slab_free(cp, callback->kmm_to_buf);
                  kmem_move_end(cp, callback);
                  return;
          }
  
          /*
!          * Checking the slab layer is easy, so we might as well do that here
!          * in case we can avoid bothering the client.
           */
          mutex_enter(&cp->cache_lock);
          free_on_slab = (kmem_slab_allocated(cp, sp,
              callback->kmm_from_buf) == NULL);
          mutex_exit(&cp->cache_lock);
  
          if (free_on_slab) {
                  kmem_slab_free(cp, callback->kmm_to_buf);
                  kmem_move_end(cp, callback);
                  return;
          }
  
--- 4861,4891 ----
           * last checked the slab's reclaimability (when the pending move was
           * enqueued), or the client may have responded NO when asked to move
           * another buffer on the same slab.
           */
          if (!kmem_slab_is_reclaimable(cp, sp, callback->kmm_flags)) {
+                 KMEM_STAT_ADD(kmem_move_stats.kms_no_longer_reclaimable);
+                 KMEM_STAT_COND_ADD((callback->kmm_flags & KMM_NOTIFY),
+                     kmem_move_stats.kms_notify_no_longer_reclaimable);
                  kmem_slab_free(cp, callback->kmm_to_buf);
                  kmem_move_end(cp, callback);
                  return;
          }
  
          /*
!          * Hunting magazines is expensive, so we'll wait to do that until the
!          * client responds KMEM_CBRC_DONT_KNOW. However, checking the slab layer
!          * is cheap, so we might as well do that here in case we can avoid
!          * bothering the client.
           */
          mutex_enter(&cp->cache_lock);
          free_on_slab = (kmem_slab_allocated(cp, sp,
              callback->kmm_from_buf) == NULL);
          mutex_exit(&cp->cache_lock);
  
          if (free_on_slab) {
+                 KMEM_STAT_ADD(kmem_move_stats.kms_hunt_found_slab);
                  kmem_slab_free(cp, callback->kmm_to_buf);
                  kmem_move_end(cp, callback);
                  return;
          }
  
*** 4784,4805 ****
--- 4893,4919 ----
                  /*
                   * Make kmem_cache_alloc_debug() apply the constructor for us.
                   */
                  if (kmem_cache_alloc_debug(cp, callback->kmm_to_buf,
                      KM_NOSLEEP, 1, caller()) != 0) {
+                         KMEM_STAT_ADD(kmem_move_stats.kms_alloc_fail);
                          kmem_move_end(cp, callback);
                          return;
                  }
          } else if (cp->cache_constructor != NULL &&
              cp->cache_constructor(callback->kmm_to_buf, cp->cache_private,
              KM_NOSLEEP) != 0) {
                  atomic_inc_64(&cp->cache_alloc_fail);
+                 KMEM_STAT_ADD(kmem_move_stats.kms_constructor_fail);
                  kmem_slab_free(cp, callback->kmm_to_buf);
                  kmem_move_end(cp, callback);
                  return;
          }
  
+         KMEM_STAT_ADD(kmem_move_stats.kms_callbacks);
+         KMEM_STAT_COND_ADD((callback->kmm_flags & KMM_NOTIFY),
+             kmem_move_stats.kms_notify_callbacks);
          cp->cache_defrag->kmd_callbacks++;
          cp->cache_defrag->kmd_thread = curthread;
          cp->cache_defrag->kmd_from_buf = callback->kmm_from_buf;
          cp->cache_defrag->kmd_to_buf = callback->kmm_to_buf;
          DTRACE_PROBE2(kmem__move__start, kmem_cache_t *, cp, kmem_move_t *,
*** 4813,4822 ****
--- 4927,4937 ----
          cp->cache_defrag->kmd_thread = NULL;
          cp->cache_defrag->kmd_from_buf = NULL;
          cp->cache_defrag->kmd_to_buf = NULL;
  
          if (response == KMEM_CBRC_YES) {
+                 KMEM_STAT_ADD(kmem_move_stats.kms_yes);
                  cp->cache_defrag->kmd_yes++;
                  kmem_slab_free_constructed(cp, callback->kmm_from_buf, B_FALSE);
                  /* slab safe to access until kmem_move_end() */
                  if (sp->slab_refcnt == 0)
                          cp->cache_defrag->kmd_slabs_freed++;
*** 4827,4882 ****
                  return;
          }
  
          switch (response) {
          case KMEM_CBRC_NO:
                  cp->cache_defrag->kmd_no++;
                  mutex_enter(&cp->cache_lock);
                  kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
                  mutex_exit(&cp->cache_lock);
                  break;
          case KMEM_CBRC_LATER:
                  cp->cache_defrag->kmd_later++;
                  mutex_enter(&cp->cache_lock);
                  if (!KMEM_SLAB_IS_PARTIAL(sp)) {
                          mutex_exit(&cp->cache_lock);
                          break;
                  }
  
                  if (++sp->slab_later_count >= KMEM_DISBELIEF) {
                          kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
                  } else if (!(sp->slab_flags & KMEM_SLAB_NOMOVE)) {
                          sp->slab_stuck_offset = KMEM_SLAB_OFFSET(sp,
                              callback->kmm_from_buf);
                  }
                  mutex_exit(&cp->cache_lock);
                  break;
          case KMEM_CBRC_DONT_NEED:
                  cp->cache_defrag->kmd_dont_need++;
                  kmem_slab_free_constructed(cp, callback->kmm_from_buf, B_FALSE);
                  if (sp->slab_refcnt == 0)
                          cp->cache_defrag->kmd_slabs_freed++;
                  mutex_enter(&cp->cache_lock);
                  kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
                  mutex_exit(&cp->cache_lock);
                  break;
          case KMEM_CBRC_DONT_KNOW:
!                 /*
!                  * If we don't know if we can move this buffer or not, we'll
!                  * just assume that we can't:  if the buffer is in fact free,
!                  * then it is sitting in one of the per-CPU magazines or in
!                  * a full magazine in the depot layer.  Either way, because
!                  * defrag is induced in the same logic that reaps a cache,
!                  * it's likely that full magazines will be returned to the
!                  * system soon (thereby accomplishing what we're trying to
!                  * accomplish here: return those magazines to their slabs).
!                  * Given this, any work that we might do now to locate a buffer
!                  * in a magazine is wasted (and expensive!) work; we bump
!                  * a counter in this case and otherwise assume that we can't
!                  * move it.
!                  */
                  cp->cache_defrag->kmd_dont_know++;
                  break;
          default:
                  panic("'%s' (%p) unexpected move callback response %d\n",
                      cp->cache_name, (void *)cp, response);
          }
--- 4942,4999 ----
                  return;
          }
  
          switch (response) {
          case KMEM_CBRC_NO:
+                 KMEM_STAT_ADD(kmem_move_stats.kms_no);
                  cp->cache_defrag->kmd_no++;
                  mutex_enter(&cp->cache_lock);
                  kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
                  mutex_exit(&cp->cache_lock);
                  break;
          case KMEM_CBRC_LATER:
+                 KMEM_STAT_ADD(kmem_move_stats.kms_later);
                  cp->cache_defrag->kmd_later++;
                  mutex_enter(&cp->cache_lock);
                  if (!KMEM_SLAB_IS_PARTIAL(sp)) {
                          mutex_exit(&cp->cache_lock);
                          break;
                  }
  
                  if (++sp->slab_later_count >= KMEM_DISBELIEF) {
+                         KMEM_STAT_ADD(kmem_move_stats.kms_disbelief);
                          kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
                  } else if (!(sp->slab_flags & KMEM_SLAB_NOMOVE)) {
                          sp->slab_stuck_offset = KMEM_SLAB_OFFSET(sp,
                              callback->kmm_from_buf);
                  }
                  mutex_exit(&cp->cache_lock);
                  break;
          case KMEM_CBRC_DONT_NEED:
+                 KMEM_STAT_ADD(kmem_move_stats.kms_dont_need);
                  cp->cache_defrag->kmd_dont_need++;
                  kmem_slab_free_constructed(cp, callback->kmm_from_buf, B_FALSE);
                  if (sp->slab_refcnt == 0)
                          cp->cache_defrag->kmd_slabs_freed++;
                  mutex_enter(&cp->cache_lock);
                  kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
                  mutex_exit(&cp->cache_lock);
                  break;
          case KMEM_CBRC_DONT_KNOW:
!                 KMEM_STAT_ADD(kmem_move_stats.kms_dont_know);
                  cp->cache_defrag->kmd_dont_know++;
+                 if (kmem_hunt_mags(cp, callback->kmm_from_buf) != NULL) {
+                         KMEM_STAT_ADD(kmem_move_stats.kms_hunt_found_mag);
+                         cp->cache_defrag->kmd_hunt_found++;
+                         kmem_slab_free_constructed(cp, callback->kmm_from_buf,
+                             B_TRUE);
+                         if (sp->slab_refcnt == 0)
+                                 cp->cache_defrag->kmd_slabs_freed++;
+                         mutex_enter(&cp->cache_lock);
+                         kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
+                         mutex_exit(&cp->cache_lock);
+                 }
                  break;
          default:
                  panic("'%s' (%p) unexpected move callback response %d\n",
                      cp->cache_name, (void *)cp, response);
          }
*** 4897,4909 ****
          ASSERT(taskq_member(kmem_taskq, curthread));
          ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
          ASSERT(sp->slab_flags & KMEM_SLAB_MOVE_PENDING);
  
          callback = kmem_cache_alloc(kmem_move_cache, KM_NOSLEEP);
! 
!         if (callback == NULL)
                  return (B_FALSE);
  
          callback->kmm_from_slab = sp;
          callback->kmm_from_buf = buf;
          callback->kmm_flags = flags;
  
--- 5014,5027 ----
          ASSERT(taskq_member(kmem_taskq, curthread));
          ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
          ASSERT(sp->slab_flags & KMEM_SLAB_MOVE_PENDING);
  
          callback = kmem_cache_alloc(kmem_move_cache, KM_NOSLEEP);
!         if (callback == NULL) {
!                 KMEM_STAT_ADD(kmem_move_stats.kms_callback_alloc_fail);
                  return (B_FALSE);
+         }
  
          callback->kmm_from_slab = sp;
          callback->kmm_from_buf = buf;
          callback->kmm_flags = flags;
  
*** 4924,4933 ****
--- 5042,5052 ----
                   */
                  if (flags & KMM_DESPERATE) {
                          pending->kmm_flags |= KMM_DESPERATE;
                  }
                  mutex_exit(&cp->cache_lock);
+                 KMEM_STAT_ADD(kmem_move_stats.kms_already_pending);
                  kmem_cache_free(kmem_move_cache, callback);
                  return (B_TRUE);
          }
  
          to_buf = kmem_slab_alloc_impl(cp, avl_first(&cp->cache_partial_slabs),
*** 4937,4946 ****
--- 5056,5066 ----
  
          mutex_exit(&cp->cache_lock);
  
          if (!taskq_dispatch(kmem_move_taskq, (task_func_t *)kmem_move_buffer,
              callback, TQ_NOSLEEP)) {
+                 KMEM_STAT_ADD(kmem_move_stats.kms_callback_taskq_fail);
                  mutex_enter(&cp->cache_lock);
                  avl_remove(&cp->cache_defrag->kmd_moves_pending, callback);
                  mutex_exit(&cp->cache_lock);
                  kmem_slab_free(cp, to_buf);
                  kmem_cache_free(kmem_move_cache, callback);
*** 4982,4991 ****
--- 5102,5112 ----
                          }
                          cp->cache_defrag->kmd_deadcount--;
                          cp->cache_slab_destroy++;
                          mutex_exit(&cp->cache_lock);
                          kmem_slab_destroy(cp, sp);
+                         KMEM_STAT_ADD(kmem_move_stats.kms_dead_slabs_freed);
                          mutex_enter(&cp->cache_lock);
                  }
          }
          mutex_exit(&cp->cache_lock);
          kmem_cache_free(kmem_move_cache, callback);
*** 5126,5135 ****
--- 5247,5258 ----
                                           * destroyed along with any other slabs
                                           * on the deadlist after the last
                                           * pending move completes.
                                           */
                                          list_insert_head(deadlist, sp);
+                                         KMEM_STAT_ADD(kmem_move_stats.
+                                             kms_endscan_slab_dead);
                                          return (-1);
                                  }
  
                                  /*
                                   * Destroy the slab now if it was completely
*** 5140,5149 ****
--- 5263,5276 ----
                                   */
                                  cp->cache_defrag->kmd_deadcount--;
                                  cp->cache_slab_destroy++;
                                  mutex_exit(&cp->cache_lock);
                                  kmem_slab_destroy(cp, sp);
+                                 KMEM_STAT_ADD(kmem_move_stats.
+                                     kms_dead_slabs_freed);
+                                 KMEM_STAT_ADD(kmem_move_stats.
+                                     kms_endscan_slab_destroyed);
                                  mutex_enter(&cp->cache_lock);
                                  /*
                                   * Since we can't pick up the scan where we left
                                   * off, abort the scan and say nothing about the
                                   * number of reclaimable slabs.
*** 5155,5164 ****
--- 5282,5293 ----
                                  /*
                                   * Abort the scan if there is not enough memory
                                   * for the request and say nothing about the
                                   * number of reclaimable slabs.
                                   */
+                                 KMEM_STAT_COND_ADD(s < max_slabs,
+                                     kmem_move_stats.kms_endscan_nomem);
                                  return (-1);
                          }
  
                          /*
                           * The slab's position changed while the lock was
*** 5170,5183 ****
                                   * If this is a KMM_DEBUG move, the slab_refcnt
                                   * may have changed because we allocated a
                                   * destination buffer on the same slab. In that
                                   * case, we're not interested in counting it.
                                   */
                                  return (-1);
                          }
!                         if ((sp->slab_flags & KMEM_SLAB_NOMOVE) != nomove)
                                  return (-1);
  
                          /*
                           * Generating a move request allocates a destination
                           * buffer from the slab layer, bumping the first partial
                           * slab if it is completely allocated. If the current
--- 5299,5318 ----
                                   * If this is a KMM_DEBUG move, the slab_refcnt
                                   * may have changed because we allocated a
                                   * destination buffer on the same slab. In that
                                   * case, we're not interested in counting it.
                                   */
+                                 KMEM_STAT_COND_ADD(!(flags & KMM_DEBUG) &&
+                                     (s < max_slabs),
+                                     kmem_move_stats.kms_endscan_refcnt_changed);
                                  return (-1);
                          }
!                         if ((sp->slab_flags & KMEM_SLAB_NOMOVE) != nomove) {
!                                 KMEM_STAT_COND_ADD(s < max_slabs,
!                                     kmem_move_stats.kms_endscan_nomove_changed);
                                  return (-1);
+                         }
  
                          /*
                           * Generating a move request allocates a destination
                           * buffer from the slab layer, bumping the first partial
                           * slab if it is completely allocated. If the current
*** 5200,5209 ****
--- 5335,5349 ----
                          }
                  }
          }
  end_scan:
  
+         KMEM_STAT_COND_ADD(!(flags & KMM_DEBUG) &&
+             (s < max_slabs) &&
+             (sp == avl_first(&cp->cache_partial_slabs)),
+             kmem_move_stats.kms_endscan_freelist);
+ 
          return (s);
  }
  
  typedef struct kmem_move_notify_args {
          kmem_cache_t *kmna_cache;
*** 5259,5275 ****
--- 5399,5420 ----
  
                          if (!avl_is_empty(
                              &cp->cache_defrag->kmd_moves_pending)) {
                                  list_insert_head(deadlist, sp);
                                  mutex_exit(&cp->cache_lock);
+                                 KMEM_STAT_ADD(kmem_move_stats.
+                                     kms_notify_slab_dead);
                                  return;
                          }
  
                          cp->cache_defrag->kmd_deadcount--;
                          cp->cache_slab_destroy++;
                          mutex_exit(&cp->cache_lock);
                          kmem_slab_destroy(cp, sp);
+                         KMEM_STAT_ADD(kmem_move_stats.kms_dead_slabs_freed);
+                         KMEM_STAT_ADD(kmem_move_stats.
+                             kms_notify_slab_destroyed);
                          return;
                  }
          } else {
                  kmem_slab_move_yes(cp, sp, buf);
          }
*** 5279,5288 ****
--- 5424,5434 ----
  void
  kmem_cache_move_notify(kmem_cache_t *cp, void *buf)
  {
          kmem_move_notify_args_t *args;
  
+         KMEM_STAT_ADD(kmem_move_stats.kms_notify);
          args = kmem_alloc(sizeof (kmem_move_notify_args_t), KM_NOSLEEP);
          if (args != NULL) {
                  args->kmna_cache = cp;
                  args->kmna_buf = buf;
                  if (!taskq_dispatch(kmem_taskq,
*** 5301,5310 ****
--- 5447,5457 ----
  
          mutex_enter(&cp->cache_lock);
          n = avl_numnodes(&cp->cache_partial_slabs);
          if (n > 1) {
                  /* kmem_move_buffers() drops and reacquires cache_lock */
+                 KMEM_STAT_ADD(kmem_move_stats.kms_defrags);
                  cp->cache_defrag->kmd_defrags++;
                  (void) kmem_move_buffers(cp, n, 0, KMM_DESPERATE);
          }
          mutex_exit(&cp->cache_lock);
  }
*** 5399,5408 ****
--- 5546,5556 ----
                   * the definition of a candidate slab if we're having trouble
                   * finding them.
                   *
                   * kmem_move_buffers() drops and reacquires cache_lock.
                   */
+                 KMEM_STAT_ADD(kmem_move_stats.kms_scans);
                  kmd->kmd_scans++;
                  slabs_found = kmem_move_buffers(cp, kmem_reclaim_scan_range,
                      kmem_reclaim_max_slabs, 0);
                  if (slabs_found >= 0) {
                          kmd->kmd_slabs_sought += kmem_reclaim_max_slabs;
*** 5439,5451 ****
--- 5587,5602 ----
  
                          (void) random_get_bytes((uint8_t *)&debug_rand, 2);
                          if (!kmem_move_noreap &&
                              ((debug_rand % kmem_mtb_reap) == 0)) {
                                  mutex_exit(&cp->cache_lock);
+                                 KMEM_STAT_ADD(kmem_move_stats.kms_debug_reaps);
                                  kmem_cache_reap(cp);
                                  return;
                          } else if ((debug_rand % kmem_mtb_move) == 0) {
+                                 KMEM_STAT_ADD(kmem_move_stats.kms_scans);
+                                 KMEM_STAT_ADD(kmem_move_stats.kms_debug_scans);
                                  kmd->kmd_scans++;
                                  (void) kmem_move_buffers(cp,
                                      kmem_reclaim_scan_range, 1, KMM_DEBUG);
                          }
                  }
*** 5452,5459 ****
  #endif  /* DEBUG */
          }
  
          mutex_exit(&cp->cache_lock);
  
!         if (reap)
                  kmem_depot_ws_reap(cp);
  }
--- 5603,5612 ----
  #endif  /* DEBUG */
          }
  
          mutex_exit(&cp->cache_lock);
  
!         if (reap) {
!                 KMEM_STAT_ADD(kmem_move_stats.kms_scan_depot_ws_reaps);
                  kmem_depot_ws_reap(cp);
+         }
  }