Print this page


Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/kmem.c
          +++ new/usr/src/uts/common/os/kmem.c
↓ open down ↓ 12 lines elided ↑ open up ↑
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
  24   23   * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  25   24   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  26   25   */
  27   26  
  28   27  /*
  29   28   * Kernel memory allocator, as described in the following two papers and a
  30   29   * statement about the consolidator:
  31   30   *
  32   31   * Jeff Bonwick,
  33   32   * The Slab Allocator: An Object-Caching Kernel Memory Allocator.
↓ open down ↓ 119 lines elided ↑ open up ↑
 153  152   *            with the new object (the unused copy destination). This response
 154  153   *            is the client's opportunity to be a model citizen and give back as
 155  154   *            much as it can.
 156  155   * DONT_KNOW: The client does not know about the object because
 157  156   *            a) the client has just allocated the object and not yet put it
 158  157   *               wherever it expects to find known objects
 159  158   *            b) the client has removed the object from wherever it expects to
 160  159   *               find known objects and is about to free it, or
 161  160   *            c) the client has freed the object.
 162  161   *            In all these cases (a, b, and c) kmem frees the new object (the
 163      - *            unused copy destination).  In the first case, the object is in
 164      - *            use and the correct action is that for LATER; in the latter two
 165      - *            cases, we know that the object is either freed or about to be
 166      - *            freed, in which case it is either already in a magazine or about
 167      - *            to be in one.  In these cases, we know that the object will either
 168      - *            be reallocated and reused, or it will end up in a full magazine
 169      - *            that will be reaped (thereby liberating the slab).  Because it
 170      - *            is prohibitively expensive to differentiate these cases, and
 171      - *            because the defrag code is executed when we're low on memory
 172      - *            (thereby biasing the system to reclaim full magazines) we treat
 173      - *            all DONT_KNOW cases as LATER and rely on cache reaping to
 174      - *            generally clean up full magazines.  While we take the same action
 175      - *            for these cases, we maintain their semantic distinction:  if
 176      - *            defragmentation is not occurring, it is useful to know if this
 177      - *            is due to objects in use (LATER) or objects in an unknown state
 178      - *            of transition (DONT_KNOW).
      162 + *            unused copy destination) and searches for the old object in the
      163 + *            magazine layer. If found, the object is removed from the magazine
      164 + *            layer and freed to the slab layer so it will no longer hold the
      165 + *            slab hostage.
 179  166   *
 180  167   * 2.3 Object States
 181  168   *
 182  169   * Neither kmem nor the client can be assumed to know the object's whereabouts
 183  170   * at the time of the callback. An object belonging to a kmem cache may be in
 184  171   * any of the following states:
 185  172   *
 186  173   * 1. Uninitialized on the slab
 187  174   * 2. Allocated from the slab but not constructed (still uninitialized)
 188  175   * 3. Allocated from the slab, constructed, but not yet ready for business
↓ open down ↓ 102 lines elided ↑ open up ↑
 291  278   *      object->o_container = (void *)((uintptr_t)object->o_container | 0x1);
 292  279   *      list_remove(&container->c_objects, object);
 293  280   *      mutex_exit(&container->c_objects_lock);
 294  281   *
 295  282   * In the common case, the object is freed to the magazine layer, where it may
 296  283   * be reused on a subsequent allocation without the overhead of calling the
 297  284   * constructor. While in the magazine it appears allocated from the point of
 298  285   * view of the slab layer, making it a candidate for the move callback. Most
 299  286   * objects unrecognized by the client in the move callback fall into this
 300  287   * category and are cheaply distinguished from known objects by the test
 301      - * described earlier. Because searching magazines is prohibitively expensive
 302      - * for kmem, clients that do not mark freed objects (and therefore return
 303      - * KMEM_CBRC_DONT_KNOW for large numbers of objects) may find defragmentation
 304      - * efficacy reduced.
      288 + * described earlier. Since recognition is cheap for the client, and searching
      289 + * magazines is expensive for kmem, kmem defers searching until the client first
      290 + * returns KMEM_CBRC_DONT_KNOW. As long as the needed effort is reasonable, kmem
      291 + * elsewhere does what it can to avoid bothering the client unnecessarily.
 305  292   *
 306  293   * Invalidating the designated pointer member before freeing the object marks
 307  294   * the object to be avoided in the callback, and conversely, assigning a valid
 308  295   * value to the designated pointer member after allocating the object makes the
 309  296   * object fair game for the callback:
 310  297   *
 311  298   *      ... allocate object ...
 312  299   *      ... set any initial state not set by the constructor ...
 313  300   *
 314  301   *      mutex_enter(&container->c_objects_lock);
↓ open down ↓ 689 lines elided ↑ open up ↑
1004  991  clock_t kmem_reap_interval;     /* cache reaping rate [15 * HZ ticks] */
1005  992  int kmem_depot_contention = 3;  /* max failed tryenters per real interval */
1006  993  pgcnt_t kmem_reapahead = 0;     /* start reaping N pages before pageout */
1007  994  int kmem_panic = 1;             /* whether to panic on error */
1008  995  int kmem_logging = 1;           /* kmem_log_enter() override */
1009  996  uint32_t kmem_mtbf = 0;         /* mean time between failures [default: off] */
1010  997  size_t kmem_transaction_log_size; /* transaction log size [2% of memory] */
1011  998  size_t kmem_content_log_size;   /* content log size [2% of memory] */
1012  999  size_t kmem_failure_log_size;   /* failure log [4 pages per CPU] */
1013 1000  size_t kmem_slab_log_size;      /* slab create log [4 pages per CPU] */
1014      -size_t kmem_zerosized_log_size; /* zero-sized log [4 pages per CPU] */
1015 1001  size_t kmem_content_maxsave = 256; /* KMF_CONTENTS max bytes to log */
1016 1002  size_t kmem_lite_minsize = 0;   /* minimum buffer size for KMF_LITE */
1017 1003  size_t kmem_lite_maxalign = 1024; /* maximum buffer alignment for KMF_LITE */
1018 1004  int kmem_lite_pcs = 4;          /* number of PCs to store in KMF_LITE mode */
1019 1005  size_t kmem_maxverify;          /* maximum bytes to inspect in debug routines */
1020 1006  size_t kmem_minfirewall;        /* hardware-enforced redzone threshold */
1021 1007  
1022      -#ifdef DEBUG
1023      -int kmem_warn_zerosized = 1;    /* whether to warn on zero-sized KM_SLEEP */
1024      -#else
1025      -int kmem_warn_zerosized = 0;    /* whether to warn on zero-sized KM_SLEEP */
1026      -#endif
1027      -
1028      -int kmem_panic_zerosized = 0;   /* whether to panic on zero-sized KM_SLEEP */
1029      -
1030 1008  #ifdef _LP64
1031 1009  size_t  kmem_max_cached = KMEM_BIG_MAXBUF;      /* maximum kmem_alloc cache */
1032 1010  #else
1033 1011  size_t  kmem_max_cached = KMEM_BIG_MAXBUF_32BIT; /* maximum kmem_alloc cache */
1034 1012  #endif
1035 1013  
1036 1014  #ifdef DEBUG
1037 1015  int kmem_flags = KMF_AUDIT | KMF_DEADBEEF | KMF_REDZONE | KMF_CONTENTS;
1038 1016  #else
1039 1017  int kmem_flags = 0;
↓ open down ↓ 13 lines elided ↑ open up ↑
1053 1031  static vmem_t           *kmem_msb_arena;        /* arena for metadata caches */
1054 1032  static vmem_t           *kmem_cache_arena;
1055 1033  static vmem_t           *kmem_hash_arena;
1056 1034  static vmem_t           *kmem_log_arena;
1057 1035  static vmem_t           *kmem_oversize_arena;
1058 1036  static vmem_t           *kmem_va_arena;
1059 1037  static vmem_t           *kmem_default_arena;
1060 1038  static vmem_t           *kmem_firewall_va_arena;
1061 1039  static vmem_t           *kmem_firewall_arena;
1062 1040  
1063      -static int              kmem_zerosized;         /* # of zero-sized allocs */
     1041 +/*
     1042 + * Define KMEM_STATS to turn on statistic gathering. By default, it is only
     1043 + * turned on when DEBUG is also defined.
     1044 + */
     1045 +#ifdef  DEBUG
     1046 +#define KMEM_STATS
     1047 +#endif  /* DEBUG */
1064 1048  
     1049 +#ifdef  KMEM_STATS
     1050 +#define KMEM_STAT_ADD(stat)                     ((stat)++)
     1051 +#define KMEM_STAT_COND_ADD(cond, stat)          ((void) (!(cond) || (stat)++))
     1052 +#else
     1053 +#define KMEM_STAT_ADD(stat)                     /* nothing */
     1054 +#define KMEM_STAT_COND_ADD(cond, stat)          /* nothing */
     1055 +#endif  /* KMEM_STATS */
     1056 +
1065 1057  /*
1066 1058   * kmem slab consolidator thresholds (tunables)
1067 1059   */
1068 1060  size_t kmem_frag_minslabs = 101;        /* minimum total slabs */
1069 1061  size_t kmem_frag_numer = 1;             /* free buffers (numerator) */
1070 1062  size_t kmem_frag_denom = KMEM_VOID_FRACTION; /* buffers (denominator) */
1071 1063  /*
1072 1064   * Maximum number of slabs from which to move buffers during a single
1073 1065   * maintenance interval while the system is not low on memory.
1074 1066   */
1075 1067  size_t kmem_reclaim_max_slabs = 1;
1076 1068  /*
1077 1069   * Number of slabs to scan backwards from the end of the partial slab list
1078 1070   * when searching for buffers to relocate.
1079 1071   */
1080 1072  size_t kmem_reclaim_scan_range = 12;
1081 1073  
     1074 +#ifdef  KMEM_STATS
     1075 +static struct {
     1076 +        uint64_t kms_callbacks;
     1077 +        uint64_t kms_yes;
     1078 +        uint64_t kms_no;
     1079 +        uint64_t kms_later;
     1080 +        uint64_t kms_dont_need;
     1081 +        uint64_t kms_dont_know;
     1082 +        uint64_t kms_hunt_found_mag;
     1083 +        uint64_t kms_hunt_found_slab;
     1084 +        uint64_t kms_hunt_alloc_fail;
     1085 +        uint64_t kms_hunt_lucky;
     1086 +        uint64_t kms_notify;
     1087 +        uint64_t kms_notify_callbacks;
     1088 +        uint64_t kms_disbelief;
     1089 +        uint64_t kms_already_pending;
     1090 +        uint64_t kms_callback_alloc_fail;
     1091 +        uint64_t kms_callback_taskq_fail;
     1092 +        uint64_t kms_endscan_slab_dead;
     1093 +        uint64_t kms_endscan_slab_destroyed;
     1094 +        uint64_t kms_endscan_nomem;
     1095 +        uint64_t kms_endscan_refcnt_changed;
     1096 +        uint64_t kms_endscan_nomove_changed;
     1097 +        uint64_t kms_endscan_freelist;
     1098 +        uint64_t kms_avl_update;
     1099 +        uint64_t kms_avl_noupdate;
     1100 +        uint64_t kms_no_longer_reclaimable;
     1101 +        uint64_t kms_notify_no_longer_reclaimable;
     1102 +        uint64_t kms_notify_slab_dead;
     1103 +        uint64_t kms_notify_slab_destroyed;
     1104 +        uint64_t kms_alloc_fail;
     1105 +        uint64_t kms_constructor_fail;
     1106 +        uint64_t kms_dead_slabs_freed;
     1107 +        uint64_t kms_defrags;
     1108 +        uint64_t kms_scans;
     1109 +        uint64_t kms_scan_depot_ws_reaps;
     1110 +        uint64_t kms_debug_reaps;
     1111 +        uint64_t kms_debug_scans;
     1112 +} kmem_move_stats;
     1113 +#endif  /* KMEM_STATS */
     1114 +
1082 1115  /* consolidator knobs */
1083 1116  static boolean_t kmem_move_noreap;
1084 1117  static boolean_t kmem_move_blocked;
1085 1118  static boolean_t kmem_move_fulltilt;
1086 1119  static boolean_t kmem_move_any_partial;
1087 1120  
1088 1121  #ifdef  DEBUG
1089 1122  /*
1090 1123   * kmem consolidator debug tunables:
1091 1124   * Ensure code coverage by occasionally running the consolidator even when the
↓ open down ↓ 10 lines elided ↑ open up ↑
1102 1135  
1103 1136  static void kmem_cache_scan(kmem_cache_t *);
1104 1137  static void kmem_cache_defrag(kmem_cache_t *);
1105 1138  static void kmem_slab_prefill(kmem_cache_t *, kmem_slab_t *);
1106 1139  
1107 1140  
1108 1141  kmem_log_header_t       *kmem_transaction_log;
1109 1142  kmem_log_header_t       *kmem_content_log;
1110 1143  kmem_log_header_t       *kmem_failure_log;
1111 1144  kmem_log_header_t       *kmem_slab_log;
1112      -kmem_log_header_t       *kmem_zerosized_log;
1113 1145  
1114 1146  static int              kmem_lite_count; /* # of PCs in kmem_buftag_lite_t */
1115 1147  
1116 1148  #define KMEM_BUFTAG_LITE_ENTER(bt, count, caller)                       \
1117 1149          if ((count) > 0) {                                              \
1118 1150                  pc_t *_s = ((kmem_buftag_lite_t *)(bt))->bt_history;    \
1119 1151                  pc_t *_e;                                               \
1120 1152                  /* memmove() the old entries down one notch */          \
1121 1153                  for (_e = &_s[(count) - 1]; _e > _s; _e--)              \
1122 1154                          *_e = *(_e - 1);                                \
↓ open down ↓ 760 lines elided ↑ open up ↑
1883 1915          }
1884 1916  
1885 1917          if (bcp->bc_next == NULL) {
1886 1918                  /* Transition the slab from completely allocated to partial. */
1887 1919                  ASSERT(sp->slab_refcnt == (sp->slab_chunks - 1));
1888 1920                  ASSERT(sp->slab_chunks > 1);
1889 1921                  list_remove(&cp->cache_complete_slabs, sp);
1890 1922                  cp->cache_complete_slab_count--;
1891 1923                  avl_add(&cp->cache_partial_slabs, sp);
1892 1924          } else {
     1925 +#ifdef  DEBUG
     1926 +                if (avl_update_gt(&cp->cache_partial_slabs, sp)) {
     1927 +                        KMEM_STAT_ADD(kmem_move_stats.kms_avl_update);
     1928 +                } else {
     1929 +                        KMEM_STAT_ADD(kmem_move_stats.kms_avl_noupdate);
     1930 +                }
     1931 +#else
1893 1932                  (void) avl_update_gt(&cp->cache_partial_slabs, sp);
     1933 +#endif
1894 1934          }
1895 1935  
1896 1936          ASSERT((cp->cache_slab_create - cp->cache_slab_destroy) ==
1897 1937              (cp->cache_complete_slab_count +
1898 1938              avl_numnodes(&cp->cache_partial_slabs) +
1899 1939              (cp->cache_defrag == NULL ? 0 : cp->cache_defrag->kmd_deadcount)));
1900 1940          mutex_exit(&cp->cache_lock);
1901 1941  }
1902 1942  
1903 1943  /*
↓ open down ↓ 1013 lines elided ↑ open up ↑
2917 2957          if ((index = ((size - 1) >> KMEM_ALIGN_SHIFT)) < KMEM_ALLOC_TABLE_MAX) {
2918 2958                  cp = kmem_alloc_table[index];
2919 2959                  /* fall through to kmem_cache_alloc() */
2920 2960  
2921 2961          } else if ((index = ((size - 1) >> KMEM_BIG_SHIFT)) <
2922 2962              kmem_big_alloc_table_max) {
2923 2963                  cp = kmem_big_alloc_table[index];
2924 2964                  /* fall through to kmem_cache_alloc() */
2925 2965  
2926 2966          } else {
2927      -                if (size == 0) {
2928      -                        if (kmflag != KM_SLEEP && !(kmflag & KM_PANIC))
2929      -                                return (NULL);
2930      -
2931      -                        /*
2932      -                         * If this is a sleeping allocation or one that has
2933      -                         * been specified to panic on allocation failure, we
2934      -                         * consider it to be deprecated behavior to allocate
2935      -                         * 0 bytes.  If we have been configured to panic under
2936      -                         * this condition, we panic; if to warn, we warn -- and
2937      -                         * regardless, we log to the kmem_zerosized_log that
2938      -                         * that this condition has occurred (which gives us
2939      -                         * enough information to be able to debug it).
2940      -                         */
2941      -                        if (kmem_panic && kmem_panic_zerosized)
2942      -                                panic("attempted to kmem_alloc() size of 0");
2943      -
2944      -                        if (kmem_warn_zerosized) {
2945      -                                cmn_err(CE_WARN, "kmem_alloc(): sleeping "
2946      -                                    "allocation with size of 0; "
2947      -                                    "see kmem_zerosized_log for details");
2948      -                        }
2949      -
2950      -                        kmem_log_event(kmem_zerosized_log, NULL, NULL, NULL);
2951      -
     2967 +                if (size == 0)
2952 2968                          return (NULL);
2953      -                }
2954 2969  
2955 2970                  buf = vmem_alloc(kmem_oversize_arena, size,
2956 2971                      kmflag & KM_VMFLAGS);
2957 2972                  if (buf == NULL)
2958 2973                          kmem_log_event(kmem_failure_log, NULL, NULL,
2959 2974                              (void *)size);
2960 2975                  else if (KMEM_DUMP(kmem_slab_cache)) {
2961 2976                          /* stats for dump intercept */
2962 2977                          kmem_dump_oversize_allocs++;
2963 2978                          if (size > kmem_dump_oversize_max)
↓ open down ↓ 593 lines elided ↑ open up ↑
3557 3572          } else {
3558 3573                  int64_t reclaimable;
3559 3574  
3560 3575                  kmem_defrag_t *kd = cp->cache_defrag;
3561 3576                  kmcp->kmc_move_callbacks.value.ui64     = kd->kmd_callbacks;
3562 3577                  kmcp->kmc_move_yes.value.ui64           = kd->kmd_yes;
3563 3578                  kmcp->kmc_move_no.value.ui64            = kd->kmd_no;
3564 3579                  kmcp->kmc_move_later.value.ui64         = kd->kmd_later;
3565 3580                  kmcp->kmc_move_dont_need.value.ui64     = kd->kmd_dont_need;
3566 3581                  kmcp->kmc_move_dont_know.value.ui64     = kd->kmd_dont_know;
3567      -                kmcp->kmc_move_hunt_found.value.ui64    = 0;
     3582 +                kmcp->kmc_move_hunt_found.value.ui64    = kd->kmd_hunt_found;
3568 3583                  kmcp->kmc_move_slabs_freed.value.ui64   = kd->kmd_slabs_freed;
3569 3584                  kmcp->kmc_defrag.value.ui64             = kd->kmd_defrags;
3570 3585                  kmcp->kmc_scan.value.ui64               = kd->kmd_scans;
3571 3586  
3572 3587                  reclaimable = cp->cache_bufslab - (cp->cache_maxchunks - 1);
3573 3588                  reclaimable = MAX(reclaimable, 0);
3574 3589                  reclaimable += ((uint64_t)reap * cp->cache_magtype->mt_magsize);
3575 3590                  kmcp->kmc_move_reclaimable.value.ui64   = reclaimable;
3576 3591          }
3577 3592  
↓ open down ↓ 889 lines elided ↑ open up ↑
4467 4482                  kmem_transaction_log = kmem_log_init(kmem_transaction_log_size);
4468 4483          }
4469 4484  
4470 4485          if (kmem_flags & (KMF_CONTENTS | KMF_RANDOMIZE)) {
4471 4486                  if (kmem_content_log_size == 0)
4472 4487                          kmem_content_log_size = kmem_maxavail() / 50;
4473 4488                  kmem_content_log = kmem_log_init(kmem_content_log_size);
4474 4489          }
4475 4490  
4476 4491          kmem_failure_log = kmem_log_init(kmem_failure_log_size);
     4492 +
4477 4493          kmem_slab_log = kmem_log_init(kmem_slab_log_size);
4478      -        kmem_zerosized_log = kmem_log_init(kmem_zerosized_log_size);
4479 4494  
4480 4495          /*
4481 4496           * Initialize STREAMS message caches so allocb() is available.
4482 4497           * This allows us to initialize the logging framework (cmn_err(9F),
4483 4498           * strlog(9F), etc) so we can start recording messages.
4484 4499           */
4485 4500          streams_msg_init();
4486 4501  
4487 4502          /*
4488 4503           * Initialize the ZSD framework in Zones so modules loaded henceforth
↓ open down ↓ 167 lines elided ↑ open up ↑
4656 4671           * reclaimed until the cache as a whole is no longer fragmented.
4657 4672           *
4658 4673           *      sp->slab_refcnt   kmd_reclaim_numer
4659 4674           *      --------------- < ------------------
4660 4675           *      sp->slab_chunks   KMEM_VOID_FRACTION
4661 4676           */
4662 4677          return ((refcnt * KMEM_VOID_FRACTION) <
4663 4678              (sp->slab_chunks * cp->cache_defrag->kmd_reclaim_numer));
4664 4679  }
4665 4680  
     4681 +static void *
     4682 +kmem_hunt_mag(kmem_cache_t *cp, kmem_magazine_t *m, int n, void *buf,
     4683 +    void *tbuf)
     4684 +{
     4685 +        int i;          /* magazine round index */
     4686 +
     4687 +        for (i = 0; i < n; i++) {
     4688 +                if (buf == m->mag_round[i]) {
     4689 +                        if (cp->cache_flags & KMF_BUFTAG) {
     4690 +                                (void) kmem_cache_free_debug(cp, tbuf,
     4691 +                                    caller());
     4692 +                        }
     4693 +                        m->mag_round[i] = tbuf;
     4694 +                        return (buf);
     4695 +                }
     4696 +        }
     4697 +
     4698 +        return (NULL);
     4699 +}
     4700 +
4666 4701  /*
     4702 + * Hunt the magazine layer for the given buffer. If found, the buffer is
     4703 + * removed from the magazine layer and returned, otherwise NULL is returned.
     4704 + * The state of the returned buffer is freed and constructed.
     4705 + */
     4706 +static void *
     4707 +kmem_hunt_mags(kmem_cache_t *cp, void *buf)
     4708 +{
     4709 +        kmem_cpu_cache_t *ccp;
     4710 +        kmem_magazine_t *m;
     4711 +        int cpu_seqid;
     4712 +        int n;          /* magazine rounds */
     4713 +        void *tbuf;     /* temporary swap buffer */
     4714 +
     4715 +        ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
     4716 +
     4717 +        /*
     4718 +         * Allocated a buffer to swap with the one we hope to pull out of a
     4719 +         * magazine when found.
     4720 +         */
     4721 +        tbuf = kmem_cache_alloc(cp, KM_NOSLEEP);
     4722 +        if (tbuf == NULL) {
     4723 +                KMEM_STAT_ADD(kmem_move_stats.kms_hunt_alloc_fail);
     4724 +                return (NULL);
     4725 +        }
     4726 +        if (tbuf == buf) {
     4727 +                KMEM_STAT_ADD(kmem_move_stats.kms_hunt_lucky);
     4728 +                if (cp->cache_flags & KMF_BUFTAG) {
     4729 +                        (void) kmem_cache_free_debug(cp, buf, caller());
     4730 +                }
     4731 +                return (buf);
     4732 +        }
     4733 +
     4734 +        /* Hunt the depot. */
     4735 +        mutex_enter(&cp->cache_depot_lock);
     4736 +        n = cp->cache_magtype->mt_magsize;
     4737 +        for (m = cp->cache_full.ml_list; m != NULL; m = m->mag_next) {
     4738 +                if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
     4739 +                        mutex_exit(&cp->cache_depot_lock);
     4740 +                        return (buf);
     4741 +                }
     4742 +        }
     4743 +        mutex_exit(&cp->cache_depot_lock);
     4744 +
     4745 +        /* Hunt the per-CPU magazines. */
     4746 +        for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++) {
     4747 +                ccp = &cp->cache_cpu[cpu_seqid];
     4748 +
     4749 +                mutex_enter(&ccp->cc_lock);
     4750 +                m = ccp->cc_loaded;
     4751 +                n = ccp->cc_rounds;
     4752 +                if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
     4753 +                        mutex_exit(&ccp->cc_lock);
     4754 +                        return (buf);
     4755 +                }
     4756 +                m = ccp->cc_ploaded;
     4757 +                n = ccp->cc_prounds;
     4758 +                if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
     4759 +                        mutex_exit(&ccp->cc_lock);
     4760 +                        return (buf);
     4761 +                }
     4762 +                mutex_exit(&ccp->cc_lock);
     4763 +        }
     4764 +
     4765 +        kmem_cache_free(cp, tbuf);
     4766 +        return (NULL);
     4767 +}
     4768 +
     4769 +/*
4667 4770   * May be called from the kmem_move_taskq, from kmem_cache_move_notify_task(),
4668 4771   * or when the buffer is freed.
4669 4772   */
4670 4773  static void
4671 4774  kmem_slab_move_yes(kmem_cache_t *cp, kmem_slab_t *sp, void *from_buf)
4672 4775  {
4673 4776          ASSERT(MUTEX_HELD(&cp->cache_lock));
4674 4777          ASSERT(KMEM_SLAB_MEMBER(sp, from_buf));
4675 4778  
4676 4779          if (!KMEM_SLAB_IS_PARTIAL(sp)) {
↓ open down ↓ 42 lines elided ↑ open up ↑
4719 4822   * guarantee the present whereabouts of the buffer to be moved, so it is up to
4720 4823   * the client to safely determine whether or not it is still using the buffer.
4721 4824   * The client must not free either of the buffers passed to the move callback,
4722 4825   * since kmem wants to free them directly to the slab layer. The client response
4723 4826   * tells kmem which of the two buffers to free:
4724 4827   *
4725 4828   * YES          kmem frees the old buffer (the move was successful)
4726 4829   * NO           kmem frees the new buffer, marks the slab of the old buffer
4727 4830   *              non-reclaimable to avoid bothering the client again
4728 4831   * LATER        kmem frees the new buffer, increments slab_later_count
4729      - * DONT_KNOW    kmem frees the new buffer
     4832 + * DONT_KNOW    kmem frees the new buffer, searches mags for the old buffer
4730 4833   * DONT_NEED    kmem frees both the old buffer and the new buffer
4731 4834   *
4732 4835   * The pending callback argument now being processed contains both of the
4733 4836   * buffers (old and new) passed to the move callback function, the slab of the
4734 4837   * old buffer, and flags related to the move request, such as whether or not the
4735 4838   * system was desperate for memory.
4736 4839   *
4737 4840   * Slabs are not freed while there is a pending callback, but instead are kept
4738 4841   * on a deadlist, which is drained after the last callback completes. This means
4739 4842   * that slabs are safe to access until kmem_move_end(), no matter how many of
↓ open down ↓ 13 lines elided ↑ open up ↑
4753 4856          ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
4754 4857          ASSERT(KMEM_SLAB_MEMBER(sp, callback->kmm_from_buf));
4755 4858  
4756 4859          /*
4757 4860           * The number of allocated buffers on the slab may have changed since we
4758 4861           * last checked the slab's reclaimability (when the pending move was
4759 4862           * enqueued), or the client may have responded NO when asked to move
4760 4863           * another buffer on the same slab.
4761 4864           */
4762 4865          if (!kmem_slab_is_reclaimable(cp, sp, callback->kmm_flags)) {
     4866 +                KMEM_STAT_ADD(kmem_move_stats.kms_no_longer_reclaimable);
     4867 +                KMEM_STAT_COND_ADD((callback->kmm_flags & KMM_NOTIFY),
     4868 +                    kmem_move_stats.kms_notify_no_longer_reclaimable);
4763 4869                  kmem_slab_free(cp, callback->kmm_to_buf);
4764 4870                  kmem_move_end(cp, callback);
4765 4871                  return;
4766 4872          }
4767 4873  
4768 4874          /*
4769      -         * Checking the slab layer is easy, so we might as well do that here
4770      -         * in case we can avoid bothering the client.
     4875 +         * Hunting magazines is expensive, so we'll wait to do that until the
     4876 +         * client responds KMEM_CBRC_DONT_KNOW. However, checking the slab layer
     4877 +         * is cheap, so we might as well do that here in case we can avoid
     4878 +         * bothering the client.
4771 4879           */
4772 4880          mutex_enter(&cp->cache_lock);
4773 4881          free_on_slab = (kmem_slab_allocated(cp, sp,
4774 4882              callback->kmm_from_buf) == NULL);
4775 4883          mutex_exit(&cp->cache_lock);
4776 4884  
4777 4885          if (free_on_slab) {
     4886 +                KMEM_STAT_ADD(kmem_move_stats.kms_hunt_found_slab);
4778 4887                  kmem_slab_free(cp, callback->kmm_to_buf);
4779 4888                  kmem_move_end(cp, callback);
4780 4889                  return;
4781 4890          }
4782 4891  
4783 4892          if (cp->cache_flags & KMF_BUFTAG) {
4784 4893                  /*
4785 4894                   * Make kmem_cache_alloc_debug() apply the constructor for us.
4786 4895                   */
4787 4896                  if (kmem_cache_alloc_debug(cp, callback->kmm_to_buf,
4788 4897                      KM_NOSLEEP, 1, caller()) != 0) {
     4898 +                        KMEM_STAT_ADD(kmem_move_stats.kms_alloc_fail);
4789 4899                          kmem_move_end(cp, callback);
4790 4900                          return;
4791 4901                  }
4792 4902          } else if (cp->cache_constructor != NULL &&
4793 4903              cp->cache_constructor(callback->kmm_to_buf, cp->cache_private,
4794 4904              KM_NOSLEEP) != 0) {
4795 4905                  atomic_inc_64(&cp->cache_alloc_fail);
     4906 +                KMEM_STAT_ADD(kmem_move_stats.kms_constructor_fail);
4796 4907                  kmem_slab_free(cp, callback->kmm_to_buf);
4797 4908                  kmem_move_end(cp, callback);
4798 4909                  return;
4799 4910          }
4800 4911  
     4912 +        KMEM_STAT_ADD(kmem_move_stats.kms_callbacks);
     4913 +        KMEM_STAT_COND_ADD((callback->kmm_flags & KMM_NOTIFY),
     4914 +            kmem_move_stats.kms_notify_callbacks);
4801 4915          cp->cache_defrag->kmd_callbacks++;
4802 4916          cp->cache_defrag->kmd_thread = curthread;
4803 4917          cp->cache_defrag->kmd_from_buf = callback->kmm_from_buf;
4804 4918          cp->cache_defrag->kmd_to_buf = callback->kmm_to_buf;
4805 4919          DTRACE_PROBE2(kmem__move__start, kmem_cache_t *, cp, kmem_move_t *,
4806 4920              callback);
4807 4921  
4808 4922          response = cp->cache_move(callback->kmm_from_buf,
4809 4923              callback->kmm_to_buf, cp->cache_bufsize, cp->cache_private);
4810 4924  
4811 4925          DTRACE_PROBE3(kmem__move__end, kmem_cache_t *, cp, kmem_move_t *,
4812 4926              callback, kmem_cbrc_t, response);
4813 4927          cp->cache_defrag->kmd_thread = NULL;
4814 4928          cp->cache_defrag->kmd_from_buf = NULL;
4815 4929          cp->cache_defrag->kmd_to_buf = NULL;
4816 4930  
4817 4931          if (response == KMEM_CBRC_YES) {
     4932 +                KMEM_STAT_ADD(kmem_move_stats.kms_yes);
4818 4933                  cp->cache_defrag->kmd_yes++;
4819 4934                  kmem_slab_free_constructed(cp, callback->kmm_from_buf, B_FALSE);
4820 4935                  /* slab safe to access until kmem_move_end() */
4821 4936                  if (sp->slab_refcnt == 0)
4822 4937                          cp->cache_defrag->kmd_slabs_freed++;
4823 4938                  mutex_enter(&cp->cache_lock);
4824 4939                  kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
4825 4940                  mutex_exit(&cp->cache_lock);
4826 4941                  kmem_move_end(cp, callback);
4827 4942                  return;
4828 4943          }
4829 4944  
4830 4945          switch (response) {
4831 4946          case KMEM_CBRC_NO:
     4947 +                KMEM_STAT_ADD(kmem_move_stats.kms_no);
4832 4948                  cp->cache_defrag->kmd_no++;
4833 4949                  mutex_enter(&cp->cache_lock);
4834 4950                  kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
4835 4951                  mutex_exit(&cp->cache_lock);
4836 4952                  break;
4837 4953          case KMEM_CBRC_LATER:
     4954 +                KMEM_STAT_ADD(kmem_move_stats.kms_later);
4838 4955                  cp->cache_defrag->kmd_later++;
4839 4956                  mutex_enter(&cp->cache_lock);
4840 4957                  if (!KMEM_SLAB_IS_PARTIAL(sp)) {
4841 4958                          mutex_exit(&cp->cache_lock);
4842 4959                          break;
4843 4960                  }
4844 4961  
4845 4962                  if (++sp->slab_later_count >= KMEM_DISBELIEF) {
     4963 +                        KMEM_STAT_ADD(kmem_move_stats.kms_disbelief);
4846 4964                          kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
4847 4965                  } else if (!(sp->slab_flags & KMEM_SLAB_NOMOVE)) {
4848 4966                          sp->slab_stuck_offset = KMEM_SLAB_OFFSET(sp,
4849 4967                              callback->kmm_from_buf);
4850 4968                  }
4851 4969                  mutex_exit(&cp->cache_lock);
4852 4970                  break;
4853 4971          case KMEM_CBRC_DONT_NEED:
     4972 +                KMEM_STAT_ADD(kmem_move_stats.kms_dont_need);
4854 4973                  cp->cache_defrag->kmd_dont_need++;
4855 4974                  kmem_slab_free_constructed(cp, callback->kmm_from_buf, B_FALSE);
4856 4975                  if (sp->slab_refcnt == 0)
4857 4976                          cp->cache_defrag->kmd_slabs_freed++;
4858 4977                  mutex_enter(&cp->cache_lock);
4859 4978                  kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
4860 4979                  mutex_exit(&cp->cache_lock);
4861 4980                  break;
4862 4981          case KMEM_CBRC_DONT_KNOW:
4863      -                /*
4864      -                 * If we don't know if we can move this buffer or not, we'll
4865      -                 * just assume that we can't:  if the buffer is in fact free,
4866      -                 * then it is sitting in one of the per-CPU magazines or in
4867      -                 * a full magazine in the depot layer.  Either way, because
4868      -                 * defrag is induced in the same logic that reaps a cache,
4869      -                 * it's likely that full magazines will be returned to the
4870      -                 * system soon (thereby accomplishing what we're trying to
4871      -                 * accomplish here: return those magazines to their slabs).
4872      -                 * Given this, any work that we might do now to locate a buffer
4873      -                 * in a magazine is wasted (and expensive!) work; we bump
4874      -                 * a counter in this case and otherwise assume that we can't
4875      -                 * move it.
4876      -                 */
     4982 +                KMEM_STAT_ADD(kmem_move_stats.kms_dont_know);
4877 4983                  cp->cache_defrag->kmd_dont_know++;
     4984 +                if (kmem_hunt_mags(cp, callback->kmm_from_buf) != NULL) {
     4985 +                        KMEM_STAT_ADD(kmem_move_stats.kms_hunt_found_mag);
     4986 +                        cp->cache_defrag->kmd_hunt_found++;
     4987 +                        kmem_slab_free_constructed(cp, callback->kmm_from_buf,
     4988 +                            B_TRUE);
     4989 +                        if (sp->slab_refcnt == 0)
     4990 +                                cp->cache_defrag->kmd_slabs_freed++;
     4991 +                        mutex_enter(&cp->cache_lock);
     4992 +                        kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
     4993 +                        mutex_exit(&cp->cache_lock);
     4994 +                }
4878 4995                  break;
4879 4996          default:
4880 4997                  panic("'%s' (%p) unexpected move callback response %d\n",
4881 4998                      cp->cache_name, (void *)cp, response);
4882 4999          }
4883 5000  
4884 5001          kmem_slab_free_constructed(cp, callback->kmm_to_buf, B_FALSE);
4885 5002          kmem_move_end(cp, callback);
4886 5003  }
4887 5004  
↓ open down ↓ 4 lines elided ↑ open up ↑
4892 5009          void *to_buf;
4893 5010          avl_index_t index;
4894 5011          kmem_move_t *callback, *pending;
4895 5012          ulong_t n;
4896 5013  
4897 5014          ASSERT(taskq_member(kmem_taskq, curthread));
4898 5015          ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
4899 5016          ASSERT(sp->slab_flags & KMEM_SLAB_MOVE_PENDING);
4900 5017  
4901 5018          callback = kmem_cache_alloc(kmem_move_cache, KM_NOSLEEP);
4902      -
4903      -        if (callback == NULL)
     5019 +        if (callback == NULL) {
     5020 +                KMEM_STAT_ADD(kmem_move_stats.kms_callback_alloc_fail);
4904 5021                  return (B_FALSE);
     5022 +        }
4905 5023  
4906 5024          callback->kmm_from_slab = sp;
4907 5025          callback->kmm_from_buf = buf;
4908 5026          callback->kmm_flags = flags;
4909 5027  
4910 5028          mutex_enter(&cp->cache_lock);
4911 5029  
4912 5030          n = avl_numnodes(&cp->cache_partial_slabs);
4913 5031          if ((n == 0) || ((n == 1) && !(flags & KMM_DEBUG))) {
4914 5032                  mutex_exit(&cp->cache_lock);
↓ open down ↓ 4 lines elided ↑ open up ↑
4919 5037          pending = avl_find(&cp->cache_defrag->kmd_moves_pending, buf, &index);
4920 5038          if (pending != NULL) {
4921 5039                  /*
4922 5040                   * If the move is already pending and we're desperate now,
4923 5041                   * update the move flags.
4924 5042                   */
4925 5043                  if (flags & KMM_DESPERATE) {
4926 5044                          pending->kmm_flags |= KMM_DESPERATE;
4927 5045                  }
4928 5046                  mutex_exit(&cp->cache_lock);
     5047 +                KMEM_STAT_ADD(kmem_move_stats.kms_already_pending);
4929 5048                  kmem_cache_free(kmem_move_cache, callback);
4930 5049                  return (B_TRUE);
4931 5050          }
4932 5051  
4933 5052          to_buf = kmem_slab_alloc_impl(cp, avl_first(&cp->cache_partial_slabs),
4934 5053              B_FALSE);
4935 5054          callback->kmm_to_buf = to_buf;
4936 5055          avl_insert(&cp->cache_defrag->kmd_moves_pending, callback, index);
4937 5056  
4938 5057          mutex_exit(&cp->cache_lock);
4939 5058  
4940 5059          if (!taskq_dispatch(kmem_move_taskq, (task_func_t *)kmem_move_buffer,
4941 5060              callback, TQ_NOSLEEP)) {
     5061 +                KMEM_STAT_ADD(kmem_move_stats.kms_callback_taskq_fail);
4942 5062                  mutex_enter(&cp->cache_lock);
4943 5063                  avl_remove(&cp->cache_defrag->kmd_moves_pending, callback);
4944 5064                  mutex_exit(&cp->cache_lock);
4945 5065                  kmem_slab_free(cp, to_buf);
4946 5066                  kmem_cache_free(kmem_move_cache, callback);
4947 5067                  return (B_FALSE);
4948 5068          }
4949 5069  
4950 5070          return (B_TRUE);
4951 5071  }
↓ open down ↓ 25 lines elided ↑ open up ↑
4977 5097                   */
4978 5098                  while ((sp = list_remove_head(deadlist)) != NULL) {
4979 5099                          if (sp->slab_flags & KMEM_SLAB_MOVE_PENDING) {
4980 5100                                  list_insert_tail(deadlist, sp);
4981 5101                                  break;
4982 5102                          }
4983 5103                          cp->cache_defrag->kmd_deadcount--;
4984 5104                          cp->cache_slab_destroy++;
4985 5105                          mutex_exit(&cp->cache_lock);
4986 5106                          kmem_slab_destroy(cp, sp);
     5107 +                        KMEM_STAT_ADD(kmem_move_stats.kms_dead_slabs_freed);
4987 5108                          mutex_enter(&cp->cache_lock);
4988 5109                  }
4989 5110          }
4990 5111          mutex_exit(&cp->cache_lock);
4991 5112          kmem_cache_free(kmem_move_cache, callback);
4992 5113  }
4993 5114  
4994 5115  /*
4995 5116   * Move buffers from least used slabs first by scanning backwards from the end
4996 5117   * of the partial slab list. Scan at most max_scan candidate slabs and move
↓ open down ↓ 124 lines elided ↑ open up ↑
5121 5242                                           * context where that is determined
5122 5243                                           * requires the slab to exist.
5123 5244                                           * Fortunately, a pending move also
5124 5245                                           * means we don't need to destroy the
5125 5246                                           * slab here, since it will get
5126 5247                                           * destroyed along with any other slabs
5127 5248                                           * on the deadlist after the last
5128 5249                                           * pending move completes.
5129 5250                                           */
5130 5251                                          list_insert_head(deadlist, sp);
     5252 +                                        KMEM_STAT_ADD(kmem_move_stats.
     5253 +                                            kms_endscan_slab_dead);
5131 5254                                          return (-1);
5132 5255                                  }
5133 5256  
5134 5257                                  /*
5135 5258                                   * Destroy the slab now if it was completely
5136 5259                                   * freed while we dropped cache_lock and there
5137 5260                                   * are no pending moves. Since slab_refcnt
5138 5261                                   * cannot change once it reaches zero, no new
5139 5262                                   * pending moves from that slab are possible.
5140 5263                                   */
5141 5264                                  cp->cache_defrag->kmd_deadcount--;
5142 5265                                  cp->cache_slab_destroy++;
5143 5266                                  mutex_exit(&cp->cache_lock);
5144 5267                                  kmem_slab_destroy(cp, sp);
     5268 +                                KMEM_STAT_ADD(kmem_move_stats.
     5269 +                                    kms_dead_slabs_freed);
     5270 +                                KMEM_STAT_ADD(kmem_move_stats.
     5271 +                                    kms_endscan_slab_destroyed);
5145 5272                                  mutex_enter(&cp->cache_lock);
5146 5273                                  /*
5147 5274                                   * Since we can't pick up the scan where we left
5148 5275                                   * off, abort the scan and say nothing about the
5149 5276                                   * number of reclaimable slabs.
5150 5277                                   */
5151 5278                                  return (-1);
5152 5279                          }
5153 5280  
5154 5281                          if (!success) {
5155 5282                                  /*
5156 5283                                   * Abort the scan if there is not enough memory
5157 5284                                   * for the request and say nothing about the
5158 5285                                   * number of reclaimable slabs.
5159 5286                                   */
     5287 +                                KMEM_STAT_COND_ADD(s < max_slabs,
     5288 +                                    kmem_move_stats.kms_endscan_nomem);
5160 5289                                  return (-1);
5161 5290                          }
5162 5291  
5163 5292                          /*
5164 5293                           * The slab's position changed while the lock was
5165 5294                           * dropped, so we don't know where we are in the
5166 5295                           * sequence any more.
5167 5296                           */
5168 5297                          if (sp->slab_refcnt != refcnt) {
5169 5298                                  /*
5170 5299                                   * If this is a KMM_DEBUG move, the slab_refcnt
5171 5300                                   * may have changed because we allocated a
5172 5301                                   * destination buffer on the same slab. In that
5173 5302                                   * case, we're not interested in counting it.
5174 5303                                   */
     5304 +                                KMEM_STAT_COND_ADD(!(flags & KMM_DEBUG) &&
     5305 +                                    (s < max_slabs),
     5306 +                                    kmem_move_stats.kms_endscan_refcnt_changed);
5175 5307                                  return (-1);
5176 5308                          }
5177      -                        if ((sp->slab_flags & KMEM_SLAB_NOMOVE) != nomove)
     5309 +                        if ((sp->slab_flags & KMEM_SLAB_NOMOVE) != nomove) {
     5310 +                                KMEM_STAT_COND_ADD(s < max_slabs,
     5311 +                                    kmem_move_stats.kms_endscan_nomove_changed);
5178 5312                                  return (-1);
     5313 +                        }
5179 5314  
5180 5315                          /*
5181 5316                           * Generating a move request allocates a destination
5182 5317                           * buffer from the slab layer, bumping the first partial
5183 5318                           * slab if it is completely allocated. If the current
5184 5319                           * slab becomes the first partial slab as a result, we
5185 5320                           * can't continue to scan backwards.
5186 5321                           *
5187 5322                           * If this is a KMM_DEBUG move and we allocated the
5188 5323                           * destination buffer from the last partial slab, then
↓ open down ↓ 6 lines elided ↑ open up ↑
5195 5330                                  /*
5196 5331                                   * We're not interested in a second KMM_DEBUG
5197 5332                                   * move.
5198 5333                                   */
5199 5334                                  goto end_scan;
5200 5335                          }
5201 5336                  }
5202 5337          }
5203 5338  end_scan:
5204 5339  
     5340 +        KMEM_STAT_COND_ADD(!(flags & KMM_DEBUG) &&
     5341 +            (s < max_slabs) &&
     5342 +            (sp == avl_first(&cp->cache_partial_slabs)),
     5343 +            kmem_move_stats.kms_endscan_freelist);
     5344 +
5205 5345          return (s);
5206 5346  }
5207 5347  
5208 5348  typedef struct kmem_move_notify_args {
5209 5349          kmem_cache_t *kmna_cache;
5210 5350          void *kmna_buf;
5211 5351  } kmem_move_notify_args_t;
5212 5352  
5213 5353  static void
5214 5354  kmem_cache_move_notify_task(void *arg)
↓ open down ↓ 39 lines elided ↑ open up ↑
5254 5394                  ASSERT(sp->slab_flags & KMEM_SLAB_MOVE_PENDING);
5255 5395                  sp->slab_flags &= ~KMEM_SLAB_MOVE_PENDING;
5256 5396                  if (sp->slab_refcnt == 0) {
5257 5397                          list_t *deadlist = &cp->cache_defrag->kmd_deadlist;
5258 5398                          list_remove(deadlist, sp);
5259 5399  
5260 5400                          if (!avl_is_empty(
5261 5401                              &cp->cache_defrag->kmd_moves_pending)) {
5262 5402                                  list_insert_head(deadlist, sp);
5263 5403                                  mutex_exit(&cp->cache_lock);
     5404 +                                KMEM_STAT_ADD(kmem_move_stats.
     5405 +                                    kms_notify_slab_dead);
5264 5406                                  return;
5265 5407                          }
5266 5408  
5267 5409                          cp->cache_defrag->kmd_deadcount--;
5268 5410                          cp->cache_slab_destroy++;
5269 5411                          mutex_exit(&cp->cache_lock);
5270 5412                          kmem_slab_destroy(cp, sp);
     5413 +                        KMEM_STAT_ADD(kmem_move_stats.kms_dead_slabs_freed);
     5414 +                        KMEM_STAT_ADD(kmem_move_stats.
     5415 +                            kms_notify_slab_destroyed);
5271 5416                          return;
5272 5417                  }
5273 5418          } else {
5274 5419                  kmem_slab_move_yes(cp, sp, buf);
5275 5420          }
5276 5421          mutex_exit(&cp->cache_lock);
5277 5422  }
5278 5423  
5279 5424  void
5280 5425  kmem_cache_move_notify(kmem_cache_t *cp, void *buf)
5281 5426  {
5282 5427          kmem_move_notify_args_t *args;
5283 5428  
     5429 +        KMEM_STAT_ADD(kmem_move_stats.kms_notify);
5284 5430          args = kmem_alloc(sizeof (kmem_move_notify_args_t), KM_NOSLEEP);
5285 5431          if (args != NULL) {
5286 5432                  args->kmna_cache = cp;
5287 5433                  args->kmna_buf = buf;
5288 5434                  if (!taskq_dispatch(kmem_taskq,
5289 5435                      (task_func_t *)kmem_cache_move_notify_task, args,
5290 5436                      TQ_NOSLEEP))
5291 5437                          kmem_free(args, sizeof (kmem_move_notify_args_t));
5292 5438          }
5293 5439  }
↓ open down ↓ 2 lines elided ↑ open up ↑
5296 5442  kmem_cache_defrag(kmem_cache_t *cp)
5297 5443  {
5298 5444          size_t n;
5299 5445  
5300 5446          ASSERT(cp->cache_defrag != NULL);
5301 5447  
5302 5448          mutex_enter(&cp->cache_lock);
5303 5449          n = avl_numnodes(&cp->cache_partial_slabs);
5304 5450          if (n > 1) {
5305 5451                  /* kmem_move_buffers() drops and reacquires cache_lock */
     5452 +                KMEM_STAT_ADD(kmem_move_stats.kms_defrags);
5306 5453                  cp->cache_defrag->kmd_defrags++;
5307 5454                  (void) kmem_move_buffers(cp, n, 0, KMM_DESPERATE);
5308 5455          }
5309 5456          mutex_exit(&cp->cache_lock);
5310 5457  }
5311 5458  
5312 5459  /* Is this cache above the fragmentation threshold? */
5313 5460  static boolean_t
5314 5461  kmem_cache_frag_threshold(kmem_cache_t *cp, uint64_t nfree)
5315 5462  {
↓ open down ↓ 78 lines elided ↑ open up ↑
5394 5541                  /*
5395 5542                   * Consolidate reclaimable slabs from the end of the partial
5396 5543                   * slab list (scan at most kmem_reclaim_scan_range slabs to find
5397 5544                   * reclaimable slabs). Keep track of how many candidate slabs we
5398 5545                   * looked for and how many we actually found so we can adjust
5399 5546                   * the definition of a candidate slab if we're having trouble
5400 5547                   * finding them.
5401 5548                   *
5402 5549                   * kmem_move_buffers() drops and reacquires cache_lock.
5403 5550                   */
     5551 +                KMEM_STAT_ADD(kmem_move_stats.kms_scans);
5404 5552                  kmd->kmd_scans++;
5405 5553                  slabs_found = kmem_move_buffers(cp, kmem_reclaim_scan_range,
5406 5554                      kmem_reclaim_max_slabs, 0);
5407 5555                  if (slabs_found >= 0) {
5408 5556                          kmd->kmd_slabs_sought += kmem_reclaim_max_slabs;
5409 5557                          kmd->kmd_slabs_found += slabs_found;
5410 5558                  }
5411 5559  
5412 5560                  if (++kmd->kmd_tries >= kmem_reclaim_scan_range) {
5413 5561                          kmd->kmd_tries = 0;
↓ open down ↓ 20 lines elided ↑ open up ↑
5434 5582                           * In a debug kernel we want the consolidator to
5435 5583                           * run occasionally even when there is plenty of
5436 5584                           * memory.
5437 5585                           */
5438 5586                          uint16_t debug_rand;
5439 5587  
5440 5588                          (void) random_get_bytes((uint8_t *)&debug_rand, 2);
5441 5589                          if (!kmem_move_noreap &&
5442 5590                              ((debug_rand % kmem_mtb_reap) == 0)) {
5443 5591                                  mutex_exit(&cp->cache_lock);
     5592 +                                KMEM_STAT_ADD(kmem_move_stats.kms_debug_reaps);
5444 5593                                  kmem_cache_reap(cp);
5445 5594                                  return;
5446 5595                          } else if ((debug_rand % kmem_mtb_move) == 0) {
     5596 +                                KMEM_STAT_ADD(kmem_move_stats.kms_scans);
     5597 +                                KMEM_STAT_ADD(kmem_move_stats.kms_debug_scans);
5447 5598                                  kmd->kmd_scans++;
5448 5599                                  (void) kmem_move_buffers(cp,
5449 5600                                      kmem_reclaim_scan_range, 1, KMM_DEBUG);
5450 5601                          }
5451 5602                  }
5452 5603  #endif  /* DEBUG */
5453 5604          }
5454 5605  
5455 5606          mutex_exit(&cp->cache_lock);
5456 5607  
5457      -        if (reap)
     5608 +        if (reap) {
     5609 +                KMEM_STAT_ADD(kmem_move_stats.kms_scan_depot_ws_reaps);
5458 5610                  kmem_depot_ws_reap(cp);
     5611 +        }
5459 5612  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX