Print this page
11927 Log, or optionally panic, on zero-length kmem allocations
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Jason King <jason.brian.king@gmail.com>


 994         { 95,   64,     0,      512     },
 995         { 143,  64,     0,      0       },
 996 };
 997 
 998 static uint32_t kmem_reaping;
 999 static uint32_t kmem_reaping_idspace;
1000 
1001 /*
1002  * kmem tunables
1003  */
1004 clock_t kmem_reap_interval;     /* cache reaping rate [15 * HZ ticks] */
1005 int kmem_depot_contention = 3;  /* max failed tryenters per real interval */
1006 pgcnt_t kmem_reapahead = 0;     /* start reaping N pages before pageout */
1007 int kmem_panic = 1;             /* whether to panic on error */
1008 int kmem_logging = 1;           /* kmem_log_enter() override */
1009 uint32_t kmem_mtbf = 0;         /* mean time between failures [default: off] */
1010 size_t kmem_transaction_log_size; /* transaction log size [2% of memory] */
1011 size_t kmem_content_log_size;   /* content log size [2% of memory] */
1012 size_t kmem_failure_log_size;   /* failure log [4 pages per CPU] */
1013 size_t kmem_slab_log_size;      /* slab create log [4 pages per CPU] */

1014 size_t kmem_content_maxsave = 256; /* KMF_CONTENTS max bytes to log */
1015 size_t kmem_lite_minsize = 0;   /* minimum buffer size for KMF_LITE */
1016 size_t kmem_lite_maxalign = 1024; /* maximum buffer alignment for KMF_LITE */
1017 int kmem_lite_pcs = 4;          /* number of PCs to store in KMF_LITE mode */
1018 size_t kmem_maxverify;          /* maximum bytes to inspect in debug routines */
1019 size_t kmem_minfirewall;        /* hardware-enforced redzone threshold */
1020 








1021 #ifdef _LP64
1022 size_t  kmem_max_cached = KMEM_BIG_MAXBUF;      /* maximum kmem_alloc cache */
1023 #else
1024 size_t  kmem_max_cached = KMEM_BIG_MAXBUF_32BIT; /* maximum kmem_alloc cache */
1025 #endif
1026 
1027 #ifdef DEBUG
1028 int kmem_flags = KMF_AUDIT | KMF_DEADBEEF | KMF_REDZONE | KMF_CONTENTS;
1029 #else
1030 int kmem_flags = 0;
1031 #endif
1032 int kmem_ready;
1033 
1034 static kmem_cache_t     *kmem_slab_cache;
1035 static kmem_cache_t     *kmem_bufctl_cache;
1036 static kmem_cache_t     *kmem_bufctl_audit_cache;
1037 
1038 static kmutex_t         kmem_cache_lock;        /* inter-cache linkage only */
1039 static list_t           kmem_caches;
1040 
1041 static taskq_t          *kmem_taskq;
1042 static kmutex_t         kmem_flags_lock;
1043 static vmem_t           *kmem_metadata_arena;
1044 static vmem_t           *kmem_msb_arena;        /* arena for metadata caches */
1045 static vmem_t           *kmem_cache_arena;
1046 static vmem_t           *kmem_hash_arena;
1047 static vmem_t           *kmem_log_arena;
1048 static vmem_t           *kmem_oversize_arena;
1049 static vmem_t           *kmem_va_arena;
1050 static vmem_t           *kmem_default_arena;
1051 static vmem_t           *kmem_firewall_va_arena;
1052 static vmem_t           *kmem_firewall_arena;
1053 


1054 /*
1055  * kmem slab consolidator thresholds (tunables)
1056  */
1057 size_t kmem_frag_minslabs = 101;        /* minimum total slabs */
1058 size_t kmem_frag_numer = 1;             /* free buffers (numerator) */
1059 size_t kmem_frag_denom = KMEM_VOID_FRACTION; /* buffers (denominator) */
1060 /*
1061  * Maximum number of slabs from which to move buffers during a single
1062  * maintenance interval while the system is not low on memory.
1063  */
1064 size_t kmem_reclaim_max_slabs = 1;
1065 /*
1066  * Number of slabs to scan backwards from the end of the partial slab list
1067  * when searching for buffers to relocate.
1068  */
1069 size_t kmem_reclaim_scan_range = 12;
1070 
1071 /* consolidator knobs */
1072 boolean_t kmem_move_noreap;
1073 boolean_t kmem_move_blocked;


1081  * caches are not fragmented (they may never be). These intervals are mean time
1082  * in cache maintenance intervals (kmem_cache_update).
1083  */
1084 uint32_t kmem_mtb_move = 60;    /* defrag 1 slab (~15min) */
1085 uint32_t kmem_mtb_reap = 1800;  /* defrag all slabs (~7.5hrs) */
1086 #endif  /* DEBUG */
1087 
1088 static kmem_cache_t     *kmem_defrag_cache;
1089 static kmem_cache_t     *kmem_move_cache;
1090 static taskq_t          *kmem_move_taskq;
1091 
1092 static void kmem_cache_scan(kmem_cache_t *);
1093 static void kmem_cache_defrag(kmem_cache_t *);
1094 static void kmem_slab_prefill(kmem_cache_t *, kmem_slab_t *);
1095 
1096 
1097 kmem_log_header_t       *kmem_transaction_log;
1098 kmem_log_header_t       *kmem_content_log;
1099 kmem_log_header_t       *kmem_failure_log;
1100 kmem_log_header_t       *kmem_slab_log;

1101 
1102 static int              kmem_lite_count; /* # of PCs in kmem_buftag_lite_t */
1103 
1104 #define KMEM_BUFTAG_LITE_ENTER(bt, count, caller)                       \
1105         if ((count) > 0) {                                           \
1106                 pc_t *_s = ((kmem_buftag_lite_t *)(bt))->bt_history; \
1107                 pc_t *_e;                                               \
1108                 /* memmove() the old entries down one notch */          \
1109                 for (_e = &_s[(count) - 1]; _e > _s; _e--)               \
1110                         *_e = *(_e - 1);                                \
1111                 *_s = (uintptr_t)(caller);                              \
1112         }
1113 
1114 #define KMERR_MODIFIED  0       /* buffer modified while on freelist */
1115 #define KMERR_REDZONE   1       /* redzone violation (write past end of buf) */
1116 #define KMERR_DUPFREE   2       /* freed a buffer twice */
1117 #define KMERR_BADADDR   3       /* freed a bad (unallocated) address */
1118 #define KMERR_BADBUFTAG 4       /* buftag corrupted */
1119 #define KMERR_BADBUFCTL 5       /* bufctl corrupted */
1120 #define KMERR_BADCACHE  6       /* freed a buffer to the wrong cache */


2836         return (buf);
2837 }
2838 
2839 void *
2840 kmem_alloc(size_t size, int kmflag)
2841 {
2842         size_t index;
2843         kmem_cache_t *cp;
2844         void *buf;
2845 
2846         if ((index = ((size - 1) >> KMEM_ALIGN_SHIFT)) < KMEM_ALLOC_TABLE_MAX) {
2847                 cp = kmem_alloc_table[index];
2848                 /* fall through to kmem_cache_alloc() */
2849 
2850         } else if ((index = ((size - 1) >> KMEM_BIG_SHIFT)) <
2851             kmem_big_alloc_table_max) {
2852                 cp = kmem_big_alloc_table[index];
2853                 /* fall through to kmem_cache_alloc() */
2854 
2855         } else {
2856                 if (size == 0)

2857                         return (NULL);
2858 
























2859                 buf = vmem_alloc(kmem_oversize_arena, size,
2860                     kmflag & KM_VMFLAGS);
2861                 if (buf == NULL)
2862                         kmem_log_event(kmem_failure_log, NULL, NULL,
2863                             (void *)size);
2864                 else if (KMEM_DUMP(kmem_slab_cache)) {
2865                         /* stats for dump intercept */
2866                         kmem_dump_oversize_allocs++;
2867                         if (size > kmem_dump_oversize_max)
2868                                 kmem_dump_oversize_max = size;
2869                 }
2870                 return (buf);
2871         }
2872 
2873         buf = kmem_cache_alloc(cp, kmflag);
2874         if ((cp->cache_flags & KMF_BUFTAG) && !KMEM_DUMP(cp) && buf != NULL) {
2875                 kmem_buftag_t *btp = KMEM_BUFTAG(cp, buf);
2876                 ((uint8_t *)buf)[size] = KMEM_REDZONE_BYTE;
2877                 ((uint32_t *)btp)[1] = KMEM_SIZE_ENCODE(size);
2878 


4380                     segkmem_alloc, segkmem_free, kmem_minfirewall < ULONG_MAX?
4381                     kmem_firewall_va_arena : heap_arena, 0, VMC_DUMPSAFE |
4382                     VM_SLEEP);
4383         }
4384 
4385         kmem_cache_init(2, use_large_pages);
4386 
4387         if (kmem_flags & (KMF_AUDIT | KMF_RANDOMIZE)) {
4388                 if (kmem_transaction_log_size == 0)
4389                         kmem_transaction_log_size = kmem_maxavail() / 50;
4390                 kmem_transaction_log = kmem_log_init(kmem_transaction_log_size);
4391         }
4392 
4393         if (kmem_flags & (KMF_CONTENTS | KMF_RANDOMIZE)) {
4394                 if (kmem_content_log_size == 0)
4395                         kmem_content_log_size = kmem_maxavail() / 50;
4396                 kmem_content_log = kmem_log_init(kmem_content_log_size);
4397         }
4398 
4399         kmem_failure_log = kmem_log_init(kmem_failure_log_size);
4400 
4401         kmem_slab_log = kmem_log_init(kmem_slab_log_size);

4402 
4403         /*
4404          * Initialize STREAMS message caches so allocb() is available.
4405          * This allows us to initialize the logging framework (cmn_err(9F),
4406          * strlog(9F), etc) so we can start recording messages.
4407          */
4408         streams_msg_init();
4409 
4410         /*
4411          * Initialize the ZSD framework in Zones so modules loaded henceforth
4412          * can register their callbacks.
4413          */
4414         zone_zsd_init();
4415 
4416         log_init();
4417         taskq_init();
4418 
4419         /*
4420          * Warn about invalid or dangerous values of kmem_flags.
4421          * Always warn about unsupported values.




 994         { 95,   64,     0,      512     },
 995         { 143,  64,     0,      0       },
 996 };
 997 
 998 static uint32_t kmem_reaping;
 999 static uint32_t kmem_reaping_idspace;
1000 
1001 /*
1002  * kmem tunables
1003  */
1004 clock_t kmem_reap_interval;     /* cache reaping rate [15 * HZ ticks] */
1005 int kmem_depot_contention = 3;  /* max failed tryenters per real interval */
1006 pgcnt_t kmem_reapahead = 0;     /* start reaping N pages before pageout */
1007 int kmem_panic = 1;             /* whether to panic on error */
1008 int kmem_logging = 1;           /* kmem_log_enter() override */
1009 uint32_t kmem_mtbf = 0;         /* mean time between failures [default: off] */
1010 size_t kmem_transaction_log_size; /* transaction log size [2% of memory] */
1011 size_t kmem_content_log_size;   /* content log size [2% of memory] */
1012 size_t kmem_failure_log_size;   /* failure log [4 pages per CPU] */
1013 size_t kmem_slab_log_size;      /* slab create log [4 pages per CPU] */
1014 size_t kmem_zerosized_log_size; /* zero-sized log [4 pages per CPU] */
1015 size_t kmem_content_maxsave = 256; /* KMF_CONTENTS max bytes to log */
1016 size_t kmem_lite_minsize = 0;   /* minimum buffer size for KMF_LITE */
1017 size_t kmem_lite_maxalign = 1024; /* maximum buffer alignment for KMF_LITE */
1018 int kmem_lite_pcs = 4;          /* number of PCs to store in KMF_LITE mode */
1019 size_t kmem_maxverify;          /* maximum bytes to inspect in debug routines */
1020 size_t kmem_minfirewall;        /* hardware-enforced redzone threshold */
1021 
1022 #ifdef DEBUG
1023 int kmem_warn_zerosized = 1;    /* whether to warn on zero-sized KM_SLEEP */
1024 #else
1025 int kmem_warn_zerosized = 0;    /* whether to warn on zero-sized KM_SLEEP */
1026 #endif
1027 
1028 int kmem_panic_zerosized = 0;   /* whether to panic on zero-sized KM_SLEEP */
1029 
1030 #ifdef _LP64
1031 size_t  kmem_max_cached = KMEM_BIG_MAXBUF;      /* maximum kmem_alloc cache */
1032 #else
1033 size_t  kmem_max_cached = KMEM_BIG_MAXBUF_32BIT; /* maximum kmem_alloc cache */
1034 #endif
1035 
1036 #ifdef DEBUG
1037 int kmem_flags = KMF_AUDIT | KMF_DEADBEEF | KMF_REDZONE | KMF_CONTENTS;
1038 #else
1039 int kmem_flags = 0;
1040 #endif
1041 int kmem_ready;
1042 
1043 static kmem_cache_t     *kmem_slab_cache;
1044 static kmem_cache_t     *kmem_bufctl_cache;
1045 static kmem_cache_t     *kmem_bufctl_audit_cache;
1046 
1047 static kmutex_t         kmem_cache_lock;        /* inter-cache linkage only */
1048 static list_t           kmem_caches;
1049 
1050 static taskq_t          *kmem_taskq;
1051 static kmutex_t         kmem_flags_lock;
1052 static vmem_t           *kmem_metadata_arena;
1053 static vmem_t           *kmem_msb_arena;        /* arena for metadata caches */
1054 static vmem_t           *kmem_cache_arena;
1055 static vmem_t           *kmem_hash_arena;
1056 static vmem_t           *kmem_log_arena;
1057 static vmem_t           *kmem_oversize_arena;
1058 static vmem_t           *kmem_va_arena;
1059 static vmem_t           *kmem_default_arena;
1060 static vmem_t           *kmem_firewall_va_arena;
1061 static vmem_t           *kmem_firewall_arena;
1062 
1063 static int              kmem_zerosized;         /* # of zero-sized allocs */
1064 
1065 /*
1066  * kmem slab consolidator thresholds (tunables)
1067  */
1068 size_t kmem_frag_minslabs = 101;        /* minimum total slabs */
1069 size_t kmem_frag_numer = 1;             /* free buffers (numerator) */
1070 size_t kmem_frag_denom = KMEM_VOID_FRACTION; /* buffers (denominator) */
1071 /*
1072  * Maximum number of slabs from which to move buffers during a single
1073  * maintenance interval while the system is not low on memory.
1074  */
1075 size_t kmem_reclaim_max_slabs = 1;
1076 /*
1077  * Number of slabs to scan backwards from the end of the partial slab list
1078  * when searching for buffers to relocate.
1079  */
1080 size_t kmem_reclaim_scan_range = 12;
1081 
1082 /* consolidator knobs */
1083 boolean_t kmem_move_noreap;
1084 boolean_t kmem_move_blocked;


1092  * caches are not fragmented (they may never be). These intervals are mean time
1093  * in cache maintenance intervals (kmem_cache_update).
1094  */
1095 uint32_t kmem_mtb_move = 60;    /* defrag 1 slab (~15min) */
1096 uint32_t kmem_mtb_reap = 1800;  /* defrag all slabs (~7.5hrs) */
1097 #endif  /* DEBUG */
1098 
1099 static kmem_cache_t     *kmem_defrag_cache;
1100 static kmem_cache_t     *kmem_move_cache;
1101 static taskq_t          *kmem_move_taskq;
1102 
1103 static void kmem_cache_scan(kmem_cache_t *);
1104 static void kmem_cache_defrag(kmem_cache_t *);
1105 static void kmem_slab_prefill(kmem_cache_t *, kmem_slab_t *);
1106 
1107 
1108 kmem_log_header_t       *kmem_transaction_log;
1109 kmem_log_header_t       *kmem_content_log;
1110 kmem_log_header_t       *kmem_failure_log;
1111 kmem_log_header_t       *kmem_slab_log;
1112 kmem_log_header_t       *kmem_zerosized_log;
1113 
1114 static int              kmem_lite_count; /* # of PCs in kmem_buftag_lite_t */
1115 
1116 #define KMEM_BUFTAG_LITE_ENTER(bt, count, caller)                       \
1117         if ((count) > 0) {                                           \
1118                 pc_t *_s = ((kmem_buftag_lite_t *)(bt))->bt_history; \
1119                 pc_t *_e;                                               \
1120                 /* memmove() the old entries down one notch */          \
1121                 for (_e = &_s[(count) - 1]; _e > _s; _e--)               \
1122                         *_e = *(_e - 1);                                \
1123                 *_s = (uintptr_t)(caller);                              \
1124         }
1125 
1126 #define KMERR_MODIFIED  0       /* buffer modified while on freelist */
1127 #define KMERR_REDZONE   1       /* redzone violation (write past end of buf) */
1128 #define KMERR_DUPFREE   2       /* freed a buffer twice */
1129 #define KMERR_BADADDR   3       /* freed a bad (unallocated) address */
1130 #define KMERR_BADBUFTAG 4       /* buftag corrupted */
1131 #define KMERR_BADBUFCTL 5       /* bufctl corrupted */
1132 #define KMERR_BADCACHE  6       /* freed a buffer to the wrong cache */


2848         return (buf);
2849 }
2850 
2851 void *
2852 kmem_alloc(size_t size, int kmflag)
2853 {
2854         size_t index;
2855         kmem_cache_t *cp;
2856         void *buf;
2857 
2858         if ((index = ((size - 1) >> KMEM_ALIGN_SHIFT)) < KMEM_ALLOC_TABLE_MAX) {
2859                 cp = kmem_alloc_table[index];
2860                 /* fall through to kmem_cache_alloc() */
2861 
2862         } else if ((index = ((size - 1) >> KMEM_BIG_SHIFT)) <
2863             kmem_big_alloc_table_max) {
2864                 cp = kmem_big_alloc_table[index];
2865                 /* fall through to kmem_cache_alloc() */
2866 
2867         } else {
2868                 if (size == 0) {
2869                         if (kmflag != KM_SLEEP && !(kmflag & KM_PANIC))
2870                                 return (NULL);
2871 
2872                         /*
2873                          * If this is a sleeping allocation or one that has
2874                          * been specified to panic on allocation failure, we
2875                          * consider it to be deprecated behavior to allocate
2876                          * 0 bytes.  If we have been configured to panic under
2877                          * this condition, we panic; if to warn, we warn -- and
2878                          * regardless, we log to the kmem_zerosized_log that
2879                          * that this condition has occurred (which gives us
2880                          * enough information to be able to debug it).
2881                          */
2882                         if (kmem_panic && kmem_panic_zerosized)
2883                                 panic("attempted to kmem_alloc() size of 0");
2884 
2885                         if (kmem_warn_zerosized) {
2886                                 cmn_err(CE_WARN, "kmem_alloc(): sleeping "
2887                                     "allocation with size of 0; "
2888                                     "see kmem_zerosized_log for details");
2889                         }
2890 
2891                         kmem_log_event(kmem_zerosized_log, NULL, NULL, NULL);
2892 
2893                         return (NULL);
2894                 }
2895 
2896                 buf = vmem_alloc(kmem_oversize_arena, size,
2897                     kmflag & KM_VMFLAGS);
2898                 if (buf == NULL)
2899                         kmem_log_event(kmem_failure_log, NULL, NULL,
2900                             (void *)size);
2901                 else if (KMEM_DUMP(kmem_slab_cache)) {
2902                         /* stats for dump intercept */
2903                         kmem_dump_oversize_allocs++;
2904                         if (size > kmem_dump_oversize_max)
2905                                 kmem_dump_oversize_max = size;
2906                 }
2907                 return (buf);
2908         }
2909 
2910         buf = kmem_cache_alloc(cp, kmflag);
2911         if ((cp->cache_flags & KMF_BUFTAG) && !KMEM_DUMP(cp) && buf != NULL) {
2912                 kmem_buftag_t *btp = KMEM_BUFTAG(cp, buf);
2913                 ((uint8_t *)buf)[size] = KMEM_REDZONE_BYTE;
2914                 ((uint32_t *)btp)[1] = KMEM_SIZE_ENCODE(size);
2915 


4417                     segkmem_alloc, segkmem_free, kmem_minfirewall < ULONG_MAX?
4418                     kmem_firewall_va_arena : heap_arena, 0, VMC_DUMPSAFE |
4419                     VM_SLEEP);
4420         }
4421 
4422         kmem_cache_init(2, use_large_pages);
4423 
4424         if (kmem_flags & (KMF_AUDIT | KMF_RANDOMIZE)) {
4425                 if (kmem_transaction_log_size == 0)
4426                         kmem_transaction_log_size = kmem_maxavail() / 50;
4427                 kmem_transaction_log = kmem_log_init(kmem_transaction_log_size);
4428         }
4429 
4430         if (kmem_flags & (KMF_CONTENTS | KMF_RANDOMIZE)) {
4431                 if (kmem_content_log_size == 0)
4432                         kmem_content_log_size = kmem_maxavail() / 50;
4433                 kmem_content_log = kmem_log_init(kmem_content_log_size);
4434         }
4435 
4436         kmem_failure_log = kmem_log_init(kmem_failure_log_size);

4437         kmem_slab_log = kmem_log_init(kmem_slab_log_size);
4438         kmem_zerosized_log = kmem_log_init(kmem_zerosized_log_size);
4439 
4440         /*
4441          * Initialize STREAMS message caches so allocb() is available.
4442          * This allows us to initialize the logging framework (cmn_err(9F),
4443          * strlog(9F), etc) so we can start recording messages.
4444          */
4445         streams_msg_init();
4446 
4447         /*
4448          * Initialize the ZSD framework in Zones so modules loaded henceforth
4449          * can register their callbacks.
4450          */
4451         zone_zsd_init();
4452 
4453         log_init();
4454         taskq_init();
4455 
4456         /*
4457          * Warn about invalid or dangerous values of kmem_flags.
4458          * Always warn about unsupported values.