1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/errno.h>
33 #include <sys/kmem.h>
34 #include <sys/vnode.h>
35 #include <sys/vfs_opreg.h>
36 #include <sys/swap.h>
37 #include <sys/sysmacros.h>
38 #include <sys/buf.h>
39 #include <sys/callb.h>
40 #include <sys/debug.h>
41 #include <vm/seg.h>
42 #include <sys/fs/swapnode.h>
43 #include <fs/fs_subr.h>
44 #include <sys/cmn_err.h>
45 #include <sys/mem_config.h>
46 #include <sys/atomic.h>
47
48 extern const fs_operation_def_t swap_vnodeops_template[];
49
50 /*
51 * swapfs_minfree is the amount of physical memory (actually remaining
52 * availrmem) that we want to keep free for the rest of the system. This
53 * means that swapfs can only grow to availrmem - swapfs_minfree. This
54 * can be set as just constant value or a certain percentage of installed
55 * physical memory. It is set in swapinit().
56 *
57 * Users who want to change the amount of memory that can be used as swap
58 * space should do so by setting swapfs_desfree at boot time,
59 * not swapfs_minfree.
60 */
61
62 pgcnt_t swapfs_desfree = 0;
63 volatile pgcnt_t swapfs_minfree = 0;
64 volatile pgcnt_t swapfs_reserve = 0;
65
66 #ifdef SWAPFS_DEBUG
67 int swapfs_debug;
68 #endif /* SWAPFS_DEBUG */
69
70
71 static int swapfs_vpcount;
72 static kmutex_t swapfs_lock;
73 static struct async_reqs *sw_ar, *sw_pendlist, *sw_freelist;
74
75 static struct vnode **swap_vnodes; /* ptr's to swap vnodes */
76
77 static void swap_init_mem_config(void);
78
79 static pgcnt_t initial_swapfs_desfree;
80 static pgcnt_t initial_swapfs_minfree;
81 static pgcnt_t initial_swapfs_reserve;
82
83 static int swap_sync(struct vfs *vfsp, short flag, struct cred *cr);
84
85 static void
86 swapfs_recalc_save_initial(void)
87 {
88 initial_swapfs_desfree = swapfs_desfree;
89 initial_swapfs_minfree = swapfs_minfree;
90 initial_swapfs_reserve = swapfs_reserve;
91 }
92
93 static int
94 swapfs_recalc(pgcnt_t pgs)
95 {
96 pgcnt_t new_swapfs_desfree;
97 pgcnt_t new_swapfs_minfree;
98 pgcnt_t new_swapfs_reserve;
99
100 new_swapfs_desfree = initial_swapfs_desfree;
101 new_swapfs_minfree = initial_swapfs_minfree;
102 new_swapfs_reserve = initial_swapfs_reserve;
103
104 if (new_swapfs_desfree == 0)
105 new_swapfs_desfree = btopr(7 * 512 * 1024); /* 3-1/2Mb */;
106
107 if (new_swapfs_minfree == 0) {
108 /*
109 * We set this lower than we'd like here, 2Mb, because we
110 * always boot on swapfs. It's up to a safer value,
111 * swapfs_desfree, when/if we add physical swap devices
112 * in swapadd(). Users who want to change the amount of
113 * memory that can be used as swap space should do so by
114 * setting swapfs_desfree at boot time, not swapfs_minfree.
115 * However, swapfs_minfree is tunable by install as a
116 * workaround for bugid 1147463.
117 */
118 new_swapfs_minfree = MAX(btopr(2 * 1024 * 1024), pgs >> 3);
119 }
120
121 /*
122 * priv processes can reserve memory as swap as long as availrmem
123 * remains greater than swapfs_minfree; in the case of non-priv
124 * processes, memory can be reserved as swap only if availrmem
125 * doesn't fall below (swapfs_minfree + swapfs_reserve). Thus,
126 * swapfs_reserve amount of memswap is not available to non-priv
127 * processes. This protects daemons such as automounter dying
128 * as a result of application processes eating away almost entire
129 * membased swap. This safeguard becomes useless if apps are run
130 * with root access.
131 *
132 * set swapfs_reserve to a minimum of 4Mb or 1/128 of physmem whichever
133 * is greater up to the limit of 128 MB.
134 */
135 if (new_swapfs_reserve == 0)
136 new_swapfs_reserve = MIN(btopr(128 * 1024 * 1024),
137 MAX(btopr(4 * 1024 * 1024), pgs >> 7));
138
139 /* Test basic numeric viability. */
140 if (new_swapfs_minfree > pgs)
141 return (0);
142
143 /* Equivalent test to anon_resvmem() check. */
144 if (availrmem < new_swapfs_minfree) {
145 /*
146 * If ism pages are being used, then there must be agreement
147 * between these two policies.
148 */
149 if ((availrmem > segspt_minfree) && (segspt_minfree > 0)) {
150 new_swapfs_minfree = segspt_minfree;
151 } else {
152 return (0);
153 }
154 }
155
156 swapfs_desfree = new_swapfs_desfree;
157 swapfs_minfree = new_swapfs_minfree;
158 swapfs_reserve = new_swapfs_reserve;
159
160 return (1);
161 }
162
163 /*ARGSUSED1*/
164 int
165 swapinit(int fstype, char *name)
166 { /* reserve for mp */
167 ssize_t sw_freelist_size = klustsize / PAGESIZE * 2;
168 int i, error;
169
170 static const fs_operation_def_t swap_vfsops[] = {
171 VFSNAME_SYNC, { .vfs_sync = swap_sync },
172 NULL, NULL
173 };
174
175 SWAPFS_PRINT(SWAP_SUBR, "swapinit\n", 0, 0, 0, 0, 0);
176 mutex_init(&swapfs_lock, NULL, MUTEX_DEFAULT, NULL);
177
178 swap_vnodes = kmem_zalloc(MAX_SWAP_VNODES * sizeof (struct vnode *),
179 KM_SLEEP);
180
181 swapfs_recalc_save_initial();
182 if (!swapfs_recalc(physmem))
183 cmn_err(CE_PANIC, "swapfs_minfree(%lu) > physmem(%lu)",
184 swapfs_minfree, physmem);
185
186 /*
187 * Arrange for a callback on memory size change.
188 */
189 swap_init_mem_config();
190
191 sw_ar = (struct async_reqs *)
192 kmem_zalloc(sw_freelist_size*sizeof (struct async_reqs), KM_SLEEP);
193
194 error = vfs_setfsops(fstype, swap_vfsops, NULL);
195 if (error != 0) {
196 cmn_err(CE_WARN, "swapinit: bad vfs ops template");
197 return (error);
198 }
199
200 error = vn_make_ops(name, swap_vnodeops_template, &swap_vnodeops);
201 if (error != 0) {
202 (void) vfs_freevfsops_by_type(fstype);
203 cmn_err(CE_WARN, "swapinit: bad vnode ops template");
204 return (error);
205 }
206 sw_freelist = sw_ar;
207 for (i = 0; i < sw_freelist_size - 1; i++)
208 sw_ar[i].a_next = &sw_ar[i + 1];
209
210 return (0);
211 }
212
213 /*
214 * Get a swapfs vnode corresponding to the specified identifier.
215 */
216 struct vnode *
217 swapfs_getvp(ulong_t vidx)
218 {
219 struct vnode *vp;
220
221 vp = swap_vnodes[vidx];
222 if (vp) {
223 return (vp);
224 }
225
226 mutex_enter(&swapfs_lock);
227 vp = swap_vnodes[vidx];
228 if (vp == NULL) {
229 vp = vn_alloc(KM_SLEEP);
230 vn_setops(vp, swap_vnodeops);
231 vp->v_type = VREG;
232 vp->v_flag |= (VISSWAP|VISSWAPFS);
233 swap_vnodes[vidx] = vp;
234 swapfs_vpcount++;
235 }
236 mutex_exit(&swapfs_lock);
237 return (vp);
238 }
239
240 int swap_lo;
241
242 /*ARGSUSED*/
243 static int
244 swap_sync(struct vfs *vfsp, short flag, struct cred *cr)
245 {
246 struct vnode *vp;
247 int i;
248
249 if (!(flag & SYNC_ALL))
250 return (1);
251
252 /*
253 * assumes that we are the only one left to access this so that
254 * no need to use swapfs_lock (since it's staticly defined)
255 */
256 for (i = 0; i < MAX_SWAP_VNODES; i++) {
257 vp = swap_vnodes[i];
258 if (vp) {
259 VN_HOLD(vp);
260 (void) VOP_PUTPAGE(vp, (offset_t)0, 0,
261 (B_ASYNC | B_FREE), kcred, NULL);
262 VN_RELE(vp);
263 }
264 }
265 return (0);
266 }
267
268 extern int sw_pending_size;
269
270 /*
271 * Take an async request off the pending queue
272 */
273 struct async_reqs *
274 sw_getreq()
275 {
276 struct async_reqs *arg;
277
278 mutex_enter(&swapfs_lock);
279 arg = sw_pendlist;
280 if (arg) {
281 sw_pendlist = arg->a_next;
282 arg->a_next = NULL;
283 sw_pending_size -= PAGESIZE;
284 }
285 ASSERT(sw_pending_size >= 0);
286 mutex_exit(&swapfs_lock);
287 return (arg);
288 }
289
290 /*
291 * Put an async request on the pending queue
292 */
293 void
294 sw_putreq(struct async_reqs *arg)
295 {
296 /* Hold onto it */
297 VN_HOLD(arg->a_vp);
298
299 mutex_enter(&swapfs_lock);
300 arg->a_next = sw_pendlist;
301 sw_pendlist = arg;
302 sw_pending_size += PAGESIZE;
303 mutex_exit(&swapfs_lock);
304 }
305
306 /*
307 * Put an async request back on the pending queue
308 */
309 void
310 sw_putbackreq(struct async_reqs *arg)
311 {
312 mutex_enter(&swapfs_lock);
313 arg->a_next = sw_pendlist;
314 sw_pendlist = arg;
315 sw_pending_size += PAGESIZE;
316 mutex_exit(&swapfs_lock);
317 }
318
319 /*
320 * Take an async request structure off the free list
321 */
322 struct async_reqs *
323 sw_getfree()
324 {
325 struct async_reqs *arg;
326
327 mutex_enter(&swapfs_lock);
328 arg = sw_freelist;
329 if (arg) {
330 sw_freelist = arg->a_next;
331 arg->a_next = NULL;
332 }
333 mutex_exit(&swapfs_lock);
334 return (arg);
335 }
336
337 /*
338 * Put an async request structure on the free list
339 */
340 void
341 sw_putfree(struct async_reqs *arg)
342 {
343 /* Release our hold - should have locked the page by now */
344 VN_RELE(arg->a_vp);
345
346 mutex_enter(&swapfs_lock);
347 arg->a_next = sw_freelist;
348 sw_freelist = arg;
349 mutex_exit(&swapfs_lock);
350 }
351
352 static pgcnt_t swapfs_pending_delete;
353
354 /*ARGSUSED*/
355 static void
356 swap_mem_config_post_add(
357 void *arg,
358 pgcnt_t delta_swaps)
359 {
360 (void) swapfs_recalc(physmem - swapfs_pending_delete);
361 }
362
363 /*ARGSUSED*/
364 static int
365 swap_mem_config_pre_del(
366 void *arg,
367 pgcnt_t delta_swaps)
368 {
369 pgcnt_t nv;
370
371 nv = atomic_add_long_nv(&swapfs_pending_delete, (spgcnt_t)delta_swaps);
372 if (!swapfs_recalc(physmem - nv)) {
373 /*
374 * Tidy-up is done by the call to post_del which
375 * is always made.
376 */
377 cmn_err(CE_NOTE, "Memory operation refused to ensure system "
378 "doesn't deadlock due to excessive consumption by swapfs.");
379 return (EBUSY);
380 }
381 return (0);
382 }
383
384 /*ARGSUSED*/
385 static void
386 swap_mem_config_post_del(
387 void *arg,
388 pgcnt_t delta_swaps,
389 int cancelled)
390 {
391 pgcnt_t nv;
392
393 nv = atomic_add_long_nv(&swapfs_pending_delete, -(spgcnt_t)delta_swaps);
394 (void) swapfs_recalc(physmem - nv);
395 }
396
397 static kphysm_setup_vector_t swap_mem_config_vec = {
398 KPHYSM_SETUP_VECTOR_VERSION,
399 swap_mem_config_post_add,
400 swap_mem_config_pre_del,
401 swap_mem_config_post_del,
402 };
403
404 static void
405 swap_init_mem_config(void)
406 {
407 int ret;
408
409 ret = kphysm_setup_func_register(&swap_mem_config_vec, (void *)NULL);
410 ASSERT(ret == 0);
411 }