Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
+++ new/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23 - * Copyright 2016 Joyent, Inc.
23 + * Copyright 2015 Joyent, Inc.
24 24 */
25 25
26 26 #include <sys/types.h>
27 27 #include <sys/param.h>
28 28 #include <sys/sysmacros.h>
29 29 #include <sys/kmem.h>
30 30 #include <sys/time.h>
31 31 #include <sys/pathname.h>
32 32 #include <sys/vfs.h>
33 33 #include <sys/vfs_opreg.h>
34 34 #include <sys/vnode.h>
35 35 #include <sys/stat.h>
36 36 #include <sys/uio.h>
37 37 #include <sys/stat.h>
38 38 #include <sys/errno.h>
39 39 #include <sys/cmn_err.h>
40 40 #include <sys/cred.h>
41 41 #include <sys/statvfs.h>
42 42 #include <sys/mount.h>
43 43 #include <sys/debug.h>
44 44 #include <sys/systm.h>
45 45 #include <sys/mntent.h>
46 46 #include <fs/fs_subr.h>
47 47 #include <vm/page.h>
48 48 #include <vm/anon.h>
|
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
49 49 #include <sys/model.h>
50 50 #include <sys/policy.h>
51 51
52 52 #include <sys/fs/swapnode.h>
53 53 #include <sys/fs/tmp.h>
54 54 #include <sys/fs/tmpnode.h>
55 55
56 56 static int tmpfsfstype;
57 57
58 58 /*
59 - * tmpfs_mountcount is used to prevent module unloads while there is still
60 - * state from a former mount hanging around. With forced umount support, the
61 - * filesystem module must not be allowed to go away before the last
62 - * VFS_FREEVFS() call has been made. Since this is just an atomic counter,
63 - * there's no need for locking.
64 - */
65 -static uint32_t tmpfs_mountcount;
66 -
67 -/*
68 59 * tmpfs vfs operations.
69 60 */
70 61 static int tmpfsinit(int, char *);
71 62 static int tmp_mount(struct vfs *, struct vnode *,
72 63 struct mounta *, struct cred *);
73 64 static int tmp_unmount(struct vfs *, int, struct cred *);
74 65 static int tmp_root(struct vfs *, struct vnode **);
75 66 static int tmp_statvfs(struct vfs *, struct statvfs64 *);
76 67 static int tmp_vget(struct vfs *, struct vnode **, struct fid *);
77 -static void tmp_freevfs(vfs_t *vfsp);
78 68
79 69 /*
80 70 * Loadable module wrapper
81 71 */
82 72 #include <sys/modctl.h>
83 73
84 74 static mntopts_t tmpfs_proto_opttbl;
85 75
86 76 static vfsdef_t vfw = {
87 77 VFSDEF_VERSION,
88 78 "tmpfs",
89 79 tmpfsinit,
90 80 VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_ZMOUNT,
91 81 &tmpfs_proto_opttbl
92 82 };
93 83
94 84 /*
95 85 * in-kernel mnttab options
96 86 */
97 87 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
98 88 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
99 89
100 90 static mntopt_t tmpfs_options[] = {
101 91 /* Option name Cancel Opt Arg Flags Data */
102 92 { MNTOPT_XATTR, xattr_cancel, NULL, MO_DEFAULT, NULL},
103 93 { MNTOPT_NOXATTR, noxattr_cancel, NULL, NULL, NULL},
104 94 { "size", NULL, "0", MO_HASVALUE, NULL},
105 95 { "mode", NULL, NULL, MO_HASVALUE, NULL}
106 96 };
107 97
108 98
109 99 static mntopts_t tmpfs_proto_opttbl = {
110 100 sizeof (tmpfs_options) / sizeof (mntopt_t),
111 101 tmpfs_options
112 102 };
113 103
114 104 /*
115 105 * Module linkage information
116 106 */
117 107 static struct modlfs modlfs = {
118 108 &mod_fsops, "filesystem for tmpfs", &vfw
119 109 };
120 110
121 111 static struct modlinkage modlinkage = {
122 112 MODREV_1, &modlfs, NULL
123 113 };
124 114
125 115 int
|
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
126 116 _init()
127 117 {
128 118 return (mod_install(&modlinkage));
129 119 }
130 120
131 121 int
132 122 _fini()
133 123 {
134 124 int error;
135 125
136 - /*
137 - * If a forceably unmounted instance is still hanging around, we cannot
138 - * allow the module to be unloaded because that would cause panics once
139 - * the VFS framework decides it's time to call into VFS_FREEVFS().
140 - */
141 - if (tmpfs_mountcount)
142 - return (EBUSY);
143 -
144 126 error = mod_remove(&modlinkage);
145 127 if (error)
146 128 return (error);
147 129 /*
148 130 * Tear down the operations vectors
149 131 */
150 132 (void) vfs_freevfsops_by_type(tmpfsfstype);
151 133 vn_freevnodeops(tmp_vnodeops);
152 134 return (0);
153 135 }
154 136
155 137 int
156 138 _info(struct modinfo *modinfop)
157 139 {
158 140 return (mod_info(&modlinkage, modinfop));
159 141 }
160 142
161 143 /*
144 + * The following are patchable variables limiting the amount of system
145 + * resources tmpfs can use.
146 + *
147 + * tmpfs_maxkmem limits the amount of kernel kmem_alloc memory
148 + * tmpfs can use for it's data structures (e.g. tmpnodes, directory entries)
149 + * It is not determined by setting a hard limit but rather as a percentage of
150 + * physical memory which is determined when tmpfs is first used in the system.
151 + *
162 152 * tmpfs_minfree is the minimum amount of swap space that tmpfs leaves for
163 153 * the rest of the system. In other words, if the amount of free swap space
164 154 * in the system (i.e. anoninfo.ani_free) drops below tmpfs_minfree, tmpfs
165 155 * anon allocations will fail.
166 156 *
167 157 * There is also a per mount limit on the amount of swap space
168 158 * (tmount.tm_anonmax) settable via a mount option.
169 159 */
160 +size_t tmpfs_maxkmem = 0;
170 161 size_t tmpfs_minfree = 0;
162 +size_t tmp_kmemspace; /* bytes of kernel heap used by all tmpfs */
171 163
172 164 static major_t tmpfs_major;
173 165 static minor_t tmpfs_minor;
174 166 static kmutex_t tmpfs_minor_lock;
175 167
176 168 /*
177 169 * initialize global tmpfs locks and such
178 170 * called when loading tmpfs module
179 171 */
180 172 static int
181 173 tmpfsinit(int fstype, char *name)
182 174 {
183 175 static const fs_operation_def_t tmp_vfsops_template[] = {
184 176 VFSNAME_MOUNT, { .vfs_mount = tmp_mount },
185 177 VFSNAME_UNMOUNT, { .vfs_unmount = tmp_unmount },
186 178 VFSNAME_ROOT, { .vfs_root = tmp_root },
187 179 VFSNAME_STATVFS, { .vfs_statvfs = tmp_statvfs },
188 180 VFSNAME_VGET, { .vfs_vget = tmp_vget },
189 - VFSNAME_FREEVFS, { .vfs_freevfs = tmp_freevfs },
190 181 NULL, NULL
191 182 };
192 183 int error;
193 184 extern void tmpfs_hash_init();
194 185
195 186 tmpfs_hash_init();
196 187 tmpfsfstype = fstype;
197 188 ASSERT(tmpfsfstype != 0);
198 189
199 190 error = vfs_setfsops(fstype, tmp_vfsops_template, NULL);
200 191 if (error != 0) {
201 192 cmn_err(CE_WARN, "tmpfsinit: bad vfs ops template");
202 193 return (error);
203 194 }
204 195
205 196 error = vn_make_ops(name, tmp_vnodeops_template, &tmp_vnodeops);
206 197 if (error != 0) {
207 198 (void) vfs_freevfsops_by_type(fstype);
208 199 cmn_err(CE_WARN, "tmpfsinit: bad vnode ops template");
209 200 return (error);
210 201 }
211 202
212 203 /*
213 204 * tmpfs_minfree doesn't need to be some function of configured
|
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
214 205 * swap space since it really is an absolute limit of swap space
215 206 * which still allows other processes to execute.
216 207 */
217 208 if (tmpfs_minfree == 0) {
218 209 /*
219 210 * Set if not patched
220 211 */
221 212 tmpfs_minfree = btopr(TMPMINFREE);
222 213 }
223 214
215 + /*
216 + * The maximum amount of space tmpfs can allocate is
217 + * TMPMAXPROCKMEM percent of kernel memory
218 + */
219 + if (tmpfs_maxkmem == 0)
220 + tmpfs_maxkmem = MAX(PAGESIZE, kmem_maxavail() / TMPMAXFRACKMEM);
221 +
224 222 if ((tmpfs_major = getudev()) == (major_t)-1) {
225 223 cmn_err(CE_WARN, "tmpfsinit: Can't get unique device number.");
226 224 tmpfs_major = 0;
227 225 }
228 226 mutex_init(&tmpfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
229 - tmpfs_mountcount = 0;
230 227 return (0);
231 228 }
232 229
233 230 static int
234 231 tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
235 232 {
236 233 struct tmount *tm = NULL;
237 234 struct tmpnode *tp;
238 235 struct pathname dpn;
239 236 int error;
240 - size_t anonmax;
237 + pgcnt_t anonmax;
241 238 struct vattr rattr;
242 239 int got_attrs;
243 240 boolean_t mode_arg = B_FALSE;
244 241 mode_t root_mode = 0777;
245 242 char *argstr;
246 243
247 244 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
248 245 return (error);
249 246
250 247 if (mvp->v_type != VDIR)
251 248 return (ENOTDIR);
252 249
253 250 mutex_enter(&mvp->v_lock);
254 251 if ((uap->flags & MS_REMOUNT) == 0 && (uap->flags & MS_OVERLAY) == 0 &&
255 252 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
256 253 mutex_exit(&mvp->v_lock);
257 254 return (EBUSY);
258 255 }
259 256 mutex_exit(&mvp->v_lock);
260 257
261 258 /*
262 259 * Having the resource be anything but "swap" doesn't make sense.
263 260 */
264 261 vfs_setresource(vfsp, "swap", 0);
265 262
266 263 /*
267 264 * now look for options we understand...
268 265 */
269 266
270 267 /* tmpfs doesn't support read-only mounts */
271 268 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
272 269 error = EINVAL;
273 270 goto out;
|
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
274 271 }
275 272
276 273 /*
277 274 * tm_anonmax is set according to the mount arguments
278 275 * if any. Otherwise, it is set to a maximum value.
279 276 */
280 277 if (vfs_optionisset(vfsp, "size", &argstr)) {
281 278 if ((error = tmp_convnum(argstr, &anonmax)) != 0)
282 279 goto out;
283 280 } else {
284 - anonmax = SIZE_MAX;
281 + anonmax = ULONG_MAX;
285 282 }
286 283
287 284 /*
288 285 * The "mode" mount argument allows the operator to override the
289 286 * permissions of the root of the tmpfs mount.
290 287 */
291 288 if (vfs_optionisset(vfsp, "mode", &argstr)) {
292 289 if ((error = tmp_convmode(argstr, &root_mode)) != 0) {
293 290 goto out;
294 291 }
295 292 mode_arg = B_TRUE;
296 293 }
297 294
298 295 if (error = pn_get(uap->dir,
299 296 (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn))
300 297 goto out;
301 298
302 299 if (uap->flags & MS_REMOUNT) {
303 300 tm = (struct tmount *)VFSTOTM(vfsp);
304 301
305 302 /*
306 303 * If we change the size so its less than what is currently
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
307 304 * being used, we allow that. The file system will simply be
308 305 * full until enough files have been removed to get below the
309 306 * new max.
310 307 */
311 308 mutex_enter(&tm->tm_contents);
312 309 tm->tm_anonmax = anonmax;
313 310 mutex_exit(&tm->tm_contents);
314 311 goto out;
315 312 }
316 313
317 - if ((tm = kmem_zalloc(sizeof (struct tmount),
318 - KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
314 + if ((tm = tmp_memalloc(sizeof (struct tmount), 0)) == NULL) {
319 315 pn_free(&dpn);
320 316 error = ENOMEM;
321 317 goto out;
322 318 }
323 319
324 320 /*
325 321 * find an available minor device number for this mount
326 322 */
327 323 mutex_enter(&tmpfs_minor_lock);
328 324 do {
329 325 tmpfs_minor = (tmpfs_minor + 1) & L_MAXMIN32;
330 326 tm->tm_dev = makedevice(tmpfs_major, tmpfs_minor);
331 327 } while (vfs_devismounted(tm->tm_dev));
332 328 mutex_exit(&tmpfs_minor_lock);
333 329
334 330 /*
335 331 * Set but don't bother entering the mutex
336 332 * (tmount not on mount list yet)
337 333 */
338 334 mutex_init(&tm->tm_contents, NULL, MUTEX_DEFAULT, NULL);
339 335 mutex_init(&tm->tm_renamelck, NULL, MUTEX_DEFAULT, NULL);
|
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
340 336
341 337 tm->tm_vfsp = vfsp;
342 338 tm->tm_anonmax = anonmax;
343 339
344 340 vfsp->vfs_data = (caddr_t)tm;
345 341 vfsp->vfs_fstype = tmpfsfstype;
346 342 vfsp->vfs_dev = tm->tm_dev;
347 343 vfsp->vfs_bsize = PAGESIZE;
348 344 vfsp->vfs_flag |= VFS_NOTRUNC;
349 345 vfs_make_fsid(&vfsp->vfs_fsid, tm->tm_dev, tmpfsfstype);
350 - tm->tm_mntpath = kmem_zalloc(dpn.pn_pathlen + 1, KM_SLEEP);
346 + tm->tm_mntpath = tmp_memalloc(dpn.pn_pathlen + 1, TMP_MUSTHAVE);
351 347 (void) strcpy(tm->tm_mntpath, dpn.pn_path);
352 348
353 349 /*
354 - * Preemptively set vfs_zone before any of the tmp_kmem_* functions are
355 - * called. That field is not populated until after a successful
356 - * VFS_MOUNT when domount() sets vfsp metadata via vfs_add(). An
357 - * accurate value is required for proper swap usage accounting.
358 - */
359 - ASSERT0(uap->flags & MS_REMOUNT);
360 - ASSERT(vfsp->vfs_zone == NULL);
361 - vfsp->vfs_zone = curproc->p_zone;
362 -
363 - /*
364 350 * allocate and initialize root tmpnode structure
365 351 */
366 352 bzero(&rattr, sizeof (struct vattr));
367 353 rattr.va_mode = (mode_t)(S_IFDIR | root_mode);
368 354 rattr.va_type = VDIR;
369 355 rattr.va_rdev = 0;
370 - tp = tmp_kmem_zalloc(tm, sizeof (struct tmpnode), KM_SLEEP);
371 - if (tp == NULL) {
372 - kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
373 - mutex_destroy(&tm->tm_contents);
374 - mutex_destroy(&tm->tm_renamelck);
375 - kmem_free(tm, sizeof (struct tmount));
376 -
377 - pn_free(&dpn);
378 - error = ENOMEM;
379 - goto out;
380 - }
356 + tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
381 357 tmpnode_init(tm, tp, &rattr, cr);
382 358
383 359 /*
384 360 * Get the mode, uid, and gid from the underlying mount point.
385 361 */
386 362 rattr.va_mask = AT_MODE|AT_UID|AT_GID; /* Hint to getattr */
387 363 got_attrs = VOP_GETATTR(mvp, &rattr, 0, cr, NULL);
388 364
389 365 rw_enter(&tp->tn_rwlock, RW_WRITER);
390 366 TNTOV(tp)->v_flag |= VROOT;
391 367
392 368 /*
393 369 * If the getattr succeeded, use its results. Otherwise allow
394 370 * the previously set hardwired defaults to prevail.
395 371 */
396 372 if (got_attrs == 0) {
397 373 if (!mode_arg) {
398 374 /*
399 375 * Only use the underlying mount point for the
400 376 * mode if the "mode" mount argument was not
401 377 * provided.
402 378 */
403 379 tp->tn_mode = rattr.va_mode;
404 380 }
405 381 tp->tn_uid = rattr.va_uid;
406 382 tp->tn_gid = rattr.va_gid;
407 383 }
408 384
|
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
409 385 /*
410 386 * initialize linked list of tmpnodes so that the back pointer of
411 387 * the root tmpnode always points to the last one on the list
412 388 * and the forward pointer of the last node is null
413 389 */
414 390 tp->tn_back = tp;
415 391 tp->tn_forw = NULL;
416 392 tp->tn_nlink = 0;
417 393 tm->tm_rootnode = tp;
418 394
419 - if (tdirinit(tp, tp) != 0) {
420 - /*
421 - * While we would normally let our VOP_INACTIVE function take
422 - * care of cleaning up here, we're in a bit of a delicate
423 - * situation, so we do so manually. While it's tempting to try
424 - * and rely upon tmpfs_freevfs() and others, it's probably safer
425 - * for the time to do this manually at the cost of duplication.
426 - */
427 - vn_invalid(TNTOV(tp));
428 - rw_destroy(&tp->tn_rwlock);
429 - mutex_destroy(&tp->tn_tlock);
430 - vn_free(TNTOV(tp));
431 - tmp_kmem_free(tm, tp, sizeof (struct tmpnode));
395 + tdirinit(tp, tp);
432 396
433 - kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
434 - mutex_destroy(&tm->tm_contents);
435 - mutex_destroy(&tm->tm_renamelck);
436 - kmem_free(tm, sizeof (struct tmount));
437 - pn_free(&dpn);
438 - error = ENOMEM;
439 - goto out;
440 - }
441 -
442 397 rw_exit(&tp->tn_rwlock);
443 398
444 399 pn_free(&dpn);
445 400 error = 0;
446 - atomic_inc_32(&tmpfs_mountcount);
447 401
448 402 out:
449 403 if (error == 0)
450 404 vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
451 405
452 406 return (error);
453 407 }
454 408
455 409 static int
456 410 tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
457 411 {
458 412 struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
459 413 struct tmpnode *tnp, *cancel;
460 414 struct vnode *vp;
461 415 int error;
462 - uint_t cnt;
463 - int i;
464 416
465 417 if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
466 418 return (error);
467 419
420 + /*
421 + * forced unmount is not supported by this file system
422 + * and thus, ENOTSUP, is being returned.
423 + */
424 + if (flag & MS_FORCE)
425 + return (ENOTSUP);
426 +
468 427 mutex_enter(&tm->tm_contents);
469 428
470 429 /*
471 - * In the normal unmount case (non-forced unmount), if there are no
472 - * open files, only the root node should have a reference count.
473 - *
430 + * If there are no open files, only the root node should have
431 + * a reference count.
474 432 * With tm_contents held, nothing can be added or removed.
475 433 * There may be some dirty pages. To prevent fsflush from
476 434 * disrupting the unmount, put a hold on each node while scanning.
477 435 * If we find a previously referenced node, undo the holds we have
478 436 * placed and fail EBUSY.
479 - *
480 - * However, in the case of a forced umount, things are a bit different.
481 - * An additional VFS_HOLD is added for each outstanding VN_HOLD to
482 - * ensure that the file system is not cleaned up (tmp_freevfs) until
483 - * the last vfs hold is dropped. This happens in tmp_inactive as the
484 - * vnodes are released. Also, we can't add an additional VN_HOLD in
485 - * this case since that would prevent tmp_inactive from ever being
486 - * called. Finally, we do need to drop the zone ref now (zone_rele_ref)
487 - * so that the zone is not blocked waiting for the final file system
488 - * cleanup.
489 437 */
490 438 tnp = tm->tm_rootnode;
491 -
492 - vp = TNTOV(tnp);
493 - mutex_enter(&vp->v_lock);
494 - cnt = vp->v_count;
495 - if (flag & MS_FORCE) {
496 - vfsp->vfs_flag |= VFS_UNMOUNTED;
497 - /* Extra hold which we rele below when we drop the zone ref */
498 - VFS_HOLD(vfsp);
499 -
500 - for (i = 1; i < cnt; i++)
501 - VFS_HOLD(vfsp);
502 -
503 - /* drop the mutex now because no one can find this mount */
439 + if (TNTOV(tnp)->v_count > 1) {
504 440 mutex_exit(&tm->tm_contents);
505 - } else if (cnt > 1) {
506 - mutex_exit(&vp->v_lock);
507 - mutex_exit(&tm->tm_contents);
508 441 return (EBUSY);
509 442 }
510 - mutex_exit(&vp->v_lock);
511 443
512 - /*
513 - * Check for open files. An open file causes everything to unwind
514 - * unless this is a forced umount.
515 - */
516 444 for (tnp = tnp->tn_forw; tnp; tnp = tnp->tn_forw) {
517 - vp = TNTOV(tnp);
518 - mutex_enter(&vp->v_lock);
519 - cnt = vp->v_count;
520 - if (flag & MS_FORCE) {
521 - for (i = 0; i < cnt; i++)
522 - VFS_HOLD(vfsp);
523 -
524 - /*
525 - * In the case of a forced umount don't add an
526 - * additional VN_HOLD on the already held vnodes, like
527 - * we do in the non-forced unmount case. If the
528 - * cnt > 0, then the vnode already has at least one
529 - * hold and we need tmp_inactive to get called when the
530 - * last pre-existing hold on the node is released so
531 - * that we can VFS_RELE the VFS holds we just added.
532 - */
533 - if (cnt == 0) {
534 - /* directly add VN_HOLD since have the lock */
535 - vp->v_count++;
536 - }
537 -
538 - mutex_exit(&vp->v_lock);
539 -
540 - /*
541 - * If the tmpnode has any pages associated with it
542 - * (i.e. if it's a normal file with non-zero size), the
543 - * tmpnode could still be discovered by pageout or
544 - * fsflush via the page vnode pointers. To prevent this
545 - * from interfering with the tmp_freevfs, truncate the
546 - * tmpnode now.
547 - */
548 - if (tnp->tn_size != 0 && tnp->tn_type == VREG) {
549 - rw_enter(&tnp->tn_rwlock, RW_WRITER);
550 - rw_enter(&tnp->tn_contents, RW_WRITER);
551 -
552 - (void) tmpnode_trunc(tm, tnp, 0);
553 -
554 - rw_exit(&tnp->tn_contents);
555 - rw_exit(&tnp->tn_rwlock);
556 -
557 - ASSERT(tnp->tn_size == 0);
558 - ASSERT(tnp->tn_nblocks == 0);
559 - }
560 - } else if (cnt > 0) {
561 - /* An open file; unwind the holds we've been adding. */
562 - mutex_exit(&vp->v_lock);
445 + if ((vp = TNTOV(tnp))->v_count > 0) {
563 446 cancel = tm->tm_rootnode->tn_forw;
564 447 while (cancel != tnp) {
565 448 vp = TNTOV(cancel);
566 449 ASSERT(vp->v_count > 0);
567 450 VN_RELE(vp);
568 451 cancel = cancel->tn_forw;
569 452 }
570 453 mutex_exit(&tm->tm_contents);
571 454 return (EBUSY);
572 - } else {
573 - /* directly add a VN_HOLD since we have the lock */
574 - vp->v_count++;
575 - mutex_exit(&vp->v_lock);
576 455 }
456 + VN_HOLD(vp);
577 457 }
578 458
579 - if (flag & MS_FORCE) {
580 - /*
581 - * Drop the zone ref now since we don't know how long it will
582 - * be until the final vfs_rele is called by tmp_inactive.
583 - */
584 - if (vfsp->vfs_zone) {
585 - zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref,
586 - ZONE_REF_VFS);
587 - vfsp->vfs_zone = 0;
588 - }
589 - /* We can now drop the extra hold we added above. */
590 - VFS_RELE(vfsp);
591 - } else {
592 - /*
593 - * For the non-forced case, we can drop the mutex now because
594 - * no one can find this mount anymore
595 - */
596 - vfsp->vfs_flag |= VFS_UNMOUNTED;
597 - mutex_exit(&tm->tm_contents);
598 - }
459 + /*
460 + * We can drop the mutex now because no one can find this mount
461 + */
462 + mutex_exit(&tm->tm_contents);
599 463
600 - return (0);
601 -}
602 -
603 -/*
604 - * Implementation of VFS_FREEVFS() to support forced umounts. This is called by
605 - * the vfs framework after umount and the last VFS_RELE, to trigger the release
606 - * of any resources still associated with the given vfs_t. We only add
607 - * additional VFS_HOLDs during the forced umount case, so this is normally
608 - * called immediately after tmp_umount.
609 - */
610 -void
611 -tmp_freevfs(vfs_t *vfsp)
612 -{
613 - struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
614 - struct tmpnode *tnp;
615 - struct vnode *vp;
616 -
617 464 /*
618 465 * Free all kmemalloc'd and anonalloc'd memory associated with
619 466 * this filesystem. To do this, we go through the file list twice,
620 467 * once to remove all the directory entries, and then to remove
621 468 * all the files. We do this because there is useful code in
622 469 * tmpnode_free which assumes that the directory entry has been
623 470 * removed before the file.
624 471 */
625 -
626 472 /*
627 - * Now that we are tearing ourselves down we need to remove the
628 - * UNMOUNTED flag. If we don't, we'll later hit a VN_RELE when we remove
629 - * files from the system causing us to have a negative value. Doing this
630 - * seems a bit better than trying to set a flag on the tmount that says
631 - * we're tearing down.
632 - */
633 - vfsp->vfs_flag &= ~VFS_UNMOUNTED;
634 -
635 - /*
636 473 * Remove all directory entries
637 474 */
638 475 for (tnp = tm->tm_rootnode; tnp; tnp = tnp->tn_forw) {
639 476 rw_enter(&tnp->tn_rwlock, RW_WRITER);
640 477 if (tnp->tn_type == VDIR)
641 478 tdirtrunc(tnp);
642 479 if (tnp->tn_vnode->v_flag & V_XATTRDIR) {
643 480 /*
644 481 * Account for implicit attrdir reference.
645 482 */
646 483 ASSERT(tnp->tn_nlink > 0);
647 484 DECR_COUNT(&tnp->tn_nlink, &tnp->tn_tlock);
648 485 }
649 486 rw_exit(&tnp->tn_rwlock);
650 487 }
651 488
652 489 ASSERT(tm->tm_rootnode);
653 490
654 491 /*
655 492 * All links are gone, v_count is keeping nodes in place.
656 493 * VN_RELE should make the node disappear, unless somebody
657 494 * is holding pages against it. Nap and retry until it disappears.
658 495 *
659 496 * We re-acquire the lock to prevent others who have a HOLD on
660 497 * a tmpnode via its pages or anon slots from blowing it away
661 498 * (in tmp_inactive) while we're trying to get to it here. Once
662 499 * we have a HOLD on it we know it'll stick around.
663 500 *
664 501 */
665 502 mutex_enter(&tm->tm_contents);
666 503 /*
667 504 * Remove all the files (except the rootnode) backwards.
668 505 */
669 506 while ((tnp = tm->tm_rootnode->tn_back) != tm->tm_rootnode) {
670 507 mutex_exit(&tm->tm_contents);
671 508 /*
672 509 * Inhibit tmp_inactive from touching attribute directory
673 510 * as all nodes will be released here.
674 511 * Note we handled the link count in pass 2 above.
675 512 */
676 513 rw_enter(&tnp->tn_rwlock, RW_WRITER);
677 514 tnp->tn_xattrdp = NULL;
678 515 rw_exit(&tnp->tn_rwlock);
679 516 vp = TNTOV(tnp);
680 517 VN_RELE(vp);
681 518 mutex_enter(&tm->tm_contents);
682 519 /*
683 520 * It's still there after the RELE. Someone else like pageout
684 521 * has a hold on it so wait a bit and then try again - we know
685 522 * they'll give it up soon.
686 523 */
687 524 if (tnp == tm->tm_rootnode->tn_back) {
688 525 VN_HOLD(vp);
689 526 mutex_exit(&tm->tm_contents);
690 527 delay(hz / 4);
|
↓ open down ↓ |
45 lines elided |
↑ open up ↑ |
691 528 mutex_enter(&tm->tm_contents);
692 529 }
693 530 }
694 531 mutex_exit(&tm->tm_contents);
695 532
696 533 tm->tm_rootnode->tn_xattrdp = NULL;
697 534 VN_RELE(TNTOV(tm->tm_rootnode));
698 535
699 536 ASSERT(tm->tm_mntpath);
700 537
701 - kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
538 + tmp_memfree(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
702 539
703 540 ASSERT(tm->tm_anonmem == 0);
704 541
705 542 mutex_destroy(&tm->tm_contents);
706 543 mutex_destroy(&tm->tm_renamelck);
707 - kmem_free(tm, sizeof (struct tmount));
544 + tmp_memfree(tm, sizeof (struct tmount));
708 545
709 - /* Allow _fini() to succeed now */
710 - atomic_dec_32(&tmpfs_mountcount);
546 + return (0);
711 547 }
712 548
713 549 /*
714 550 * return root tmpnode for given vnode
715 551 */
716 552 static int
717 553 tmp_root(struct vfs *vfsp, struct vnode **vpp)
718 554 {
719 555 struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
720 556 struct tmpnode *tp = tm->tm_rootnode;
721 557 struct vnode *vp;
722 558
723 559 ASSERT(tp);
724 560
725 561 vp = TNTOV(tp);
726 562 VN_HOLD(vp);
727 563 *vpp = vp;
728 564 return (0);
729 565 }
730 566
731 567 static int
732 568 tmp_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
733 569 {
734 570 struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
735 571 ulong_t blocks;
736 572 dev32_t d32;
737 573 zoneid_t eff_zid;
738 574 struct zone *zp;
739 575
740 576 /*
741 577 * The file system may have been mounted by the global zone on
742 578 * behalf of the non-global zone. In that case, the tmount zone_id
743 579 * will be the global zone. We still want to show the swap cap inside
744 580 * the zone in this case, even though the file system was mounted by
745 581 * the global zone.
746 582 */
747 583 if (curproc->p_zone->zone_id != GLOBAL_ZONEUNIQID)
748 584 zp = curproc->p_zone;
749 585 else
750 586 zp = tm->tm_vfsp->vfs_zone;
751 587
752 588 if (zp == NULL)
753 589 eff_zid = GLOBAL_ZONEUNIQID;
754 590 else
755 591 eff_zid = zp->zone_id;
756 592
757 593 sbp->f_bsize = PAGESIZE;
758 594 sbp->f_frsize = PAGESIZE;
759 595
760 596 /*
761 597 * Find the amount of available physical and memory swap
|
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
762 598 */
763 599 mutex_enter(&anoninfo_lock);
764 600 ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
765 601 blocks = (ulong_t)CURRENT_TOTAL_AVAILABLE_SWAP;
766 602 mutex_exit(&anoninfo_lock);
767 603
768 604 /*
769 605 * If tm_anonmax for this mount is less than the available swap space
770 606 * (minus the amount tmpfs can't use), use that instead
771 607 */
772 - if (blocks > tmpfs_minfree && tm->tm_anonmax > tm->tm_anonmem) {
608 + if (blocks > tmpfs_minfree)
773 609 sbp->f_bfree = MIN(blocks - tmpfs_minfree,
774 - btop(tm->tm_anonmax) - btopr(tm->tm_anonmem));
775 - } else {
610 + tm->tm_anonmax - tm->tm_anonmem);
611 + else
776 612 sbp->f_bfree = 0;
777 - }
778 613
779 614 sbp->f_bavail = sbp->f_bfree;
780 615
781 616 /*
782 617 * Total number of blocks is what's available plus what's been used
783 618 */
784 - sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + btopr(tm->tm_anonmem));
619 + sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + tm->tm_anonmem);
785 620
786 621 if (eff_zid != GLOBAL_ZONEUNIQID &&
787 622 zp->zone_max_swap_ctl != UINT64_MAX) {
788 623 /*
789 624 * If the fs is used by a non-global zone with a swap cap,
790 625 * then report the capped size.
791 626 */
792 627 rctl_qty_t cap, used;
793 628 pgcnt_t pgcap, pgused;
794 629
795 630 mutex_enter(&zp->zone_mem_lock);
796 631 cap = zp->zone_max_swap_ctl;
797 632 used = zp->zone_max_swap;
798 633 mutex_exit(&zp->zone_mem_lock);
799 634
800 635 pgcap = btop(cap);
801 636 pgused = btop(used);
802 637
803 638 sbp->f_bfree = MIN(pgcap - pgused, sbp->f_bfree);
|
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
804 639 sbp->f_bavail = sbp->f_bfree;
805 640 sbp->f_blocks = MIN(pgcap, sbp->f_blocks);
806 641 }
807 642
808 643 /*
809 644 * The maximum number of files available is approximately the number
810 645 * of tmpnodes we can allocate from the remaining kernel memory
811 646 * available to tmpfs. This is fairly inaccurate since it doesn't
812 647 * take into account the names stored in the directory entries.
813 648 */
814 - sbp->f_ffree = sbp->f_files = ptob(availrmem) /
649 + if (tmpfs_maxkmem > tmp_kmemspace)
650 + sbp->f_ffree = (tmpfs_maxkmem - tmp_kmemspace) /
651 + (sizeof (struct tmpnode) + sizeof (struct tdirent));
652 + else
653 + sbp->f_ffree = 0;
654 +
655 + sbp->f_files = tmpfs_maxkmem /
815 656 (sizeof (struct tmpnode) + sizeof (struct tdirent));
816 657 sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
817 658 (void) cmpldev(&d32, vfsp->vfs_dev);
818 659 sbp->f_fsid = d32;
819 660 (void) strcpy(sbp->f_basetype, vfssw[tmpfsfstype].vsw_name);
820 661 (void) strncpy(sbp->f_fstr, tm->tm_mntpath, sizeof (sbp->f_fstr));
821 662 /*
822 663 * ensure null termination
823 664 */
824 665 sbp->f_fstr[sizeof (sbp->f_fstr) - 1] = '\0';
825 666 sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
826 667 sbp->f_namemax = MAXNAMELEN - 1;
827 668 return (0);
828 669 }
829 670
830 671 static int
831 672 tmp_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
832 673 {
833 674 struct tfid *tfid;
834 675 struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
835 676 struct tmpnode *tp = NULL;
836 677
837 678 tfid = (struct tfid *)fidp;
838 679 *vpp = NULL;
839 680
840 681 mutex_enter(&tm->tm_contents);
841 682 for (tp = tm->tm_rootnode; tp; tp = tp->tn_forw) {
842 683 mutex_enter(&tp->tn_tlock);
843 684 if (tp->tn_nodeid == tfid->tfid_ino) {
844 685 /*
845 686 * If the gen numbers don't match we know the
846 687 * file won't be found since only one tmpnode
847 688 * can have this number at a time.
848 689 */
849 690 if (tp->tn_gen != tfid->tfid_gen || tp->tn_nlink == 0) {
850 691 mutex_exit(&tp->tn_tlock);
851 692 mutex_exit(&tm->tm_contents);
852 693 return (0);
853 694 }
854 695 *vpp = (struct vnode *)TNTOV(tp);
855 696
856 697 VN_HOLD(*vpp);
857 698
858 699 if ((tp->tn_mode & S_ISVTX) &&
859 700 !(tp->tn_mode & (S_IXUSR | S_IFDIR))) {
860 701 mutex_enter(&(*vpp)->v_lock);
861 702 (*vpp)->v_flag |= VISSWAP;
862 703 mutex_exit(&(*vpp)->v_lock);
863 704 }
864 705 mutex_exit(&tp->tn_tlock);
865 706 mutex_exit(&tm->tm_contents);
866 707 return (0);
867 708 }
868 709 mutex_exit(&tp->tn_tlock);
869 710 }
870 711 mutex_exit(&tm->tm_contents);
871 712 return (0);
872 713 }
|
↓ open down ↓ |
48 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX