Print this page
OS-6363 system went to dark side of moon for ~467 seconds OS-6404 ARC reclaim should throttle its calls to arc_kmem_reap_now() Reviewed by: Bryan Cantrill <bryan@joyent.com> Reviewed by: Dan McDonald <danmcd@joyent.com>
@@ -18,11 +18,11 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc. All rights reserved.
*/
@@ -299,10 +299,13 @@
int zfs_arc_evict_batch_limit = 10;
/* number of seconds before growing cache again */
static int arc_grow_retry = 60;
+/* number of milliseconds before attempting a kmem-cache-reap */
+static int arc_kmem_cache_reap_retry_ms = 1000;
+
/* shift of arc_c for calculating overflow limit in arc_get_data_impl */
int zfs_arc_overflow_shift = 8;
/* shift of arc_c for calculating both min and max arc_p */
static int arc_p_min_shift = 4;
@@ -4045,25 +4048,35 @@
*/
kmem_reap();
#endif
#endif
+ /*
+ * If a kmem reap is already active, don't schedule more. We must
+ * check for this because kmem_cache_reap_soon() won't actually
+ * block on the cache being reaped (this is to prevent callers from
+ * becoming implicitly blocked by a system-wide kmem reap -- which,
+ * on a system with many, many full magazines, can take minutes).
+ */
+ if (kmem_cache_reap_active())
+ return;
+
for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
if (zio_buf_cache[i] != prev_cache) {
prev_cache = zio_buf_cache[i];
- kmem_cache_reap_now(zio_buf_cache[i]);
+ kmem_cache_reap_soon(zio_buf_cache[i]);
}
if (zio_data_buf_cache[i] != prev_data_cache) {
prev_data_cache = zio_data_buf_cache[i];
- kmem_cache_reap_now(zio_data_buf_cache[i]);
+ kmem_cache_reap_soon(zio_data_buf_cache[i]);
}
}
- kmem_cache_reap_now(abd_chunk_cache);
- kmem_cache_reap_now(buf_cache);
- kmem_cache_reap_now(hdr_full_cache);
- kmem_cache_reap_now(hdr_l2only_cache);
- kmem_cache_reap_now(range_seg_cache);
+ kmem_cache_reap_soon(abd_chunk_cache);
+ kmem_cache_reap_soon(buf_cache);
+ kmem_cache_reap_soon(hdr_full_cache);
+ kmem_cache_reap_soon(hdr_l2only_cache);
+ kmem_cache_reap_soon(range_seg_cache);
if (zio_arena != NULL) {
/*
* Ask the vmem arena to reclaim unused memory from its
* quantum caches.
@@ -4091,10 +4104,11 @@
/* ARGSUSED */
static void
arc_reclaim_thread(void *unused)
{
hrtime_t growtime = 0;
+ hrtime_t kmem_reap_time = 0;
callb_cpr_t cpr;
CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG);
mutex_enter(&arc_reclaim_lock);
@@ -4124,21 +4138,32 @@
*/
evicted = arc_adjust();
int64_t free_memory = arc_available_memory();
if (free_memory < 0) {
-
+ hrtime_t curtime = gethrtime();
arc_no_grow = B_TRUE;
arc_warm = B_TRUE;
/*
* Wait at least zfs_grow_retry (default 60) seconds
* before considering growing.
*/
- growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
+ growtime = curtime + SEC2NSEC(arc_grow_retry);
+ /*
+ * Wait at least arc_kmem_cache_reap_retry_ms
+ * between arc_kmem_reap_now() calls. Without
+ * this check it is possible to end up in a
+ * situation where we spend lots of time
+ * reaping caches, while we're near arc_c_min.
+ */
+ if (curtime >= kmem_reap_time) {
arc_kmem_reap_now();
+ kmem_reap_time = gethrtime() +
+ MSEC2NSEC(arc_kmem_cache_reap_retry_ms);
+ }
/*
* If we are still low on memory, shrink the ARC
* so that we have arc_shrink_min free space.
*/