Print this page

        

*** 18,28 **** * * CDDL HEADER END */ /* * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. ! * Copyright 2016 Joyent, Inc. */ #include <sys/types.h> #include <sys/param.h> #include <sys/sysmacros.h> --- 18,28 ---- * * CDDL HEADER END */ /* * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. ! * Copyright 2015 Joyent, Inc. */ #include <sys/types.h> #include <sys/param.h> #include <sys/sysmacros.h>
*** 54,82 **** #include <sys/fs/tmpnode.h> static int tmpfsfstype; /* - * tmpfs_mountcount is used to prevent module unloads while there is still - * state from a former mount hanging around. With forced umount support, the - * filesystem module must not be allowed to go away before the last - * VFS_FREEVFS() call has been made. Since this is just an atomic counter, - * there's no need for locking. - */ - static uint32_t tmpfs_mountcount; - - /* * tmpfs vfs operations. */ static int tmpfsinit(int, char *); static int tmp_mount(struct vfs *, struct vnode *, struct mounta *, struct cred *); static int tmp_unmount(struct vfs *, int, struct cred *); static int tmp_root(struct vfs *, struct vnode **); static int tmp_statvfs(struct vfs *, struct statvfs64 *); static int tmp_vget(struct vfs *, struct vnode **, struct fid *); - static void tmp_freevfs(vfs_t *vfsp); /* * Loadable module wrapper */ #include <sys/modctl.h> --- 54,72 ----
*** 131,148 **** int _fini() { int error; - /* - * If a forceably unmounted instance is still hanging around, we cannot - * allow the module to be unloaded because that would cause panics once - * the VFS framework decides it's time to call into VFS_FREEVFS(). - */ - if (tmpfs_mountcount) - return (EBUSY); - error = mod_remove(&modlinkage); if (error) return (error); /* * Tear down the operations vectors --- 121,130 ----
*** 157,175 **** --- 139,167 ---- { return (mod_info(&modlinkage, modinfop)); } /* + * The following are patchable variables limiting the amount of system + * resources tmpfs can use. + * + * tmpfs_maxkmem limits the amount of kernel kmem_alloc memory + * tmpfs can use for it's data structures (e.g. tmpnodes, directory entries) + * It is not determined by setting a hard limit but rather as a percentage of + * physical memory which is determined when tmpfs is first used in the system. + * * tmpfs_minfree is the minimum amount of swap space that tmpfs leaves for * the rest of the system. In other words, if the amount of free swap space * in the system (i.e. anoninfo.ani_free) drops below tmpfs_minfree, tmpfs * anon allocations will fail. * * There is also a per mount limit on the amount of swap space * (tmount.tm_anonmax) settable via a mount option. */ + size_t tmpfs_maxkmem = 0; size_t tmpfs_minfree = 0; + size_t tmp_kmemspace; /* bytes of kernel heap used by all tmpfs */ static major_t tmpfs_major; static minor_t tmpfs_minor; static kmutex_t tmpfs_minor_lock;
*** 184,194 **** VFSNAME_MOUNT, { .vfs_mount = tmp_mount }, VFSNAME_UNMOUNT, { .vfs_unmount = tmp_unmount }, VFSNAME_ROOT, { .vfs_root = tmp_root }, VFSNAME_STATVFS, { .vfs_statvfs = tmp_statvfs }, VFSNAME_VGET, { .vfs_vget = tmp_vget }, - VFSNAME_FREEVFS, { .vfs_freevfs = tmp_freevfs }, NULL, NULL }; int error; extern void tmpfs_hash_init(); --- 176,185 ----
*** 219,234 **** * Set if not patched */ tmpfs_minfree = btopr(TMPMINFREE); } if ((tmpfs_major = getudev()) == (major_t)-1) { cmn_err(CE_WARN, "tmpfsinit: Can't get unique device number."); tmpfs_major = 0; } mutex_init(&tmpfs_minor_lock, NULL, MUTEX_DEFAULT, NULL); - tmpfs_mountcount = 0; return (0); } static int tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) --- 210,231 ---- * Set if not patched */ tmpfs_minfree = btopr(TMPMINFREE); } + /* + * The maximum amount of space tmpfs can allocate is + * TMPMAXPROCKMEM percent of kernel memory + */ + if (tmpfs_maxkmem == 0) + tmpfs_maxkmem = MAX(PAGESIZE, kmem_maxavail() / TMPMAXFRACKMEM); + if ((tmpfs_major = getudev()) == (major_t)-1) { cmn_err(CE_WARN, "tmpfsinit: Can't get unique device number."); tmpfs_major = 0; } mutex_init(&tmpfs_minor_lock, NULL, MUTEX_DEFAULT, NULL); return (0); } static int tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
*** 235,245 **** { struct tmount *tm = NULL; struct tmpnode *tp; struct pathname dpn; int error; ! size_t anonmax; struct vattr rattr; int got_attrs; boolean_t mode_arg = B_FALSE; mode_t root_mode = 0777; char *argstr; --- 232,242 ---- { struct tmount *tm = NULL; struct tmpnode *tp; struct pathname dpn; int error; ! pgcnt_t anonmax; struct vattr rattr; int got_attrs; boolean_t mode_arg = B_FALSE; mode_t root_mode = 0777; char *argstr;
*** 279,289 **** */ if (vfs_optionisset(vfsp, "size", &argstr)) { if ((error = tmp_convnum(argstr, &anonmax)) != 0) goto out; } else { ! anonmax = SIZE_MAX; } /* * The "mode" mount argument allows the operator to override the * permissions of the root of the tmpfs mount. --- 276,286 ---- */ if (vfs_optionisset(vfsp, "size", &argstr)) { if ((error = tmp_convnum(argstr, &anonmax)) != 0) goto out; } else { ! anonmax = ULONG_MAX; } /* * The "mode" mount argument allows the operator to override the * permissions of the root of the tmpfs mount.
*** 312,323 **** tm->tm_anonmax = anonmax; mutex_exit(&tm->tm_contents); goto out; } ! if ((tm = kmem_zalloc(sizeof (struct tmount), ! KM_NOSLEEP | KM_NORMALPRI)) == NULL) { pn_free(&dpn); error = ENOMEM; goto out; } --- 309,319 ---- tm->tm_anonmax = anonmax; mutex_exit(&tm->tm_contents); goto out; } ! if ((tm = tmp_memalloc(sizeof (struct tmount), 0)) == NULL) { pn_free(&dpn); error = ENOMEM; goto out; }
*** 345,385 **** vfsp->vfs_fstype = tmpfsfstype; vfsp->vfs_dev = tm->tm_dev; vfsp->vfs_bsize = PAGESIZE; vfsp->vfs_flag |= VFS_NOTRUNC; vfs_make_fsid(&vfsp->vfs_fsid, tm->tm_dev, tmpfsfstype); ! tm->tm_mntpath = kmem_zalloc(dpn.pn_pathlen + 1, KM_SLEEP); (void) strcpy(tm->tm_mntpath, dpn.pn_path); /* - * Preemptively set vfs_zone before any of the tmp_kmem_* functions are - * called. That field is not populated until after a successful - * VFS_MOUNT when domount() sets vfsp metadata via vfs_add(). An - * accurate value is required for proper swap usage accounting. - */ - ASSERT0(uap->flags & MS_REMOUNT); - ASSERT(vfsp->vfs_zone == NULL); - vfsp->vfs_zone = curproc->p_zone; - - /* * allocate and initialize root tmpnode structure */ bzero(&rattr, sizeof (struct vattr)); rattr.va_mode = (mode_t)(S_IFDIR | root_mode); rattr.va_type = VDIR; rattr.va_rdev = 0; ! tp = tmp_kmem_zalloc(tm, sizeof (struct tmpnode), KM_SLEEP); ! if (tp == NULL) { ! kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1); ! mutex_destroy(&tm->tm_contents); ! mutex_destroy(&tm->tm_renamelck); ! kmem_free(tm, sizeof (struct tmount)); ! ! pn_free(&dpn); ! error = ENOMEM; ! goto out; ! } tmpnode_init(tm, tp, &rattr, cr); /* * Get the mode, uid, and gid from the underlying mount point. */ --- 341,361 ---- vfsp->vfs_fstype = tmpfsfstype; vfsp->vfs_dev = tm->tm_dev; vfsp->vfs_bsize = PAGESIZE; vfsp->vfs_flag |= VFS_NOTRUNC; vfs_make_fsid(&vfsp->vfs_fsid, tm->tm_dev, tmpfsfstype); ! tm->tm_mntpath = tmp_memalloc(dpn.pn_pathlen + 1, TMP_MUSTHAVE); (void) strcpy(tm->tm_mntpath, dpn.pn_path); /* * allocate and initialize root tmpnode structure */ bzero(&rattr, sizeof (struct vattr)); rattr.va_mode = (mode_t)(S_IFDIR | root_mode); rattr.va_type = VDIR; rattr.va_rdev = 0; ! tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE); tmpnode_init(tm, tp, &rattr, cr); /* * Get the mode, uid, and gid from the underlying mount point. */
*** 414,451 **** tp->tn_back = tp; tp->tn_forw = NULL; tp->tn_nlink = 0; tm->tm_rootnode = tp; ! if (tdirinit(tp, tp) != 0) { ! /* ! * While we would normally let our VOP_INACTIVE function take ! * care of cleaning up here, we're in a bit of a delicate ! * situation, so we do so manually. While it's tempting to try ! * and rely upon tmpfs_freevfs() and others, it's probably safer ! * for the time to do this manually at the cost of duplication. ! */ ! vn_invalid(TNTOV(tp)); ! rw_destroy(&tp->tn_rwlock); ! mutex_destroy(&tp->tn_tlock); ! vn_free(TNTOV(tp)); ! tmp_kmem_free(tm, tp, sizeof (struct tmpnode)); - kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1); - mutex_destroy(&tm->tm_contents); - mutex_destroy(&tm->tm_renamelck); - kmem_free(tm, sizeof (struct tmount)); - pn_free(&dpn); - error = ENOMEM; - goto out; - } - rw_exit(&tp->tn_rwlock); pn_free(&dpn); error = 0; - atomic_inc_32(&tmpfs_mountcount); out: if (error == 0) vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); --- 390,405 ---- tp->tn_back = tp; tp->tn_forw = NULL; tp->tn_nlink = 0; tm->tm_rootnode = tp; ! tdirinit(tp, tp); rw_exit(&tp->tn_rwlock); pn_free(&dpn); error = 0; out: if (error == 0) vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
*** 457,640 **** { struct tmount *tm = (struct tmount *)VFSTOTM(vfsp); struct tmpnode *tnp, *cancel; struct vnode *vp; int error; - uint_t cnt; - int i; if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0) return (error); mutex_enter(&tm->tm_contents); /* ! * In the normal unmount case (non-forced unmount), if there are no ! * open files, only the root node should have a reference count. ! * * With tm_contents held, nothing can be added or removed. * There may be some dirty pages. To prevent fsflush from * disrupting the unmount, put a hold on each node while scanning. * If we find a previously referenced node, undo the holds we have * placed and fail EBUSY. - * - * However, in the case of a forced umount, things are a bit different. - * An additional VFS_HOLD is added for each outstanding VN_HOLD to - * ensure that the file system is not cleaned up (tmp_freevfs) until - * the last vfs hold is dropped. This happens in tmp_inactive as the - * vnodes are released. Also, we can't add an additional VN_HOLD in - * this case since that would prevent tmp_inactive from ever being - * called. Finally, we do need to drop the zone ref now (zone_rele_ref) - * so that the zone is not blocked waiting for the final file system - * cleanup. */ tnp = tm->tm_rootnode; ! ! vp = TNTOV(tnp); ! mutex_enter(&vp->v_lock); ! cnt = vp->v_count; ! if (flag & MS_FORCE) { ! vfsp->vfs_flag |= VFS_UNMOUNTED; ! /* Extra hold which we rele below when we drop the zone ref */ ! VFS_HOLD(vfsp); ! ! for (i = 1; i < cnt; i++) ! VFS_HOLD(vfsp); ! ! /* drop the mutex now because no one can find this mount */ mutex_exit(&tm->tm_contents); - } else if (cnt > 1) { - mutex_exit(&vp->v_lock); - mutex_exit(&tm->tm_contents); return (EBUSY); } - mutex_exit(&vp->v_lock); - /* - * Check for open files. An open file causes everything to unwind - * unless this is a forced umount. - */ for (tnp = tnp->tn_forw; tnp; tnp = tnp->tn_forw) { ! vp = TNTOV(tnp); ! mutex_enter(&vp->v_lock); ! cnt = vp->v_count; ! if (flag & MS_FORCE) { ! for (i = 0; i < cnt; i++) ! VFS_HOLD(vfsp); ! ! /* ! * In the case of a forced umount don't add an ! * additional VN_HOLD on the already held vnodes, like ! * we do in the non-forced unmount case. If the ! * cnt > 0, then the vnode already has at least one ! * hold and we need tmp_inactive to get called when the ! * last pre-existing hold on the node is released so ! * that we can VFS_RELE the VFS holds we just added. ! */ ! if (cnt == 0) { ! /* directly add VN_HOLD since have the lock */ ! vp->v_count++; ! } ! ! mutex_exit(&vp->v_lock); ! ! /* ! * If the tmpnode has any pages associated with it ! * (i.e. if it's a normal file with non-zero size), the ! * tmpnode could still be discovered by pageout or ! * fsflush via the page vnode pointers. To prevent this ! * from interfering with the tmp_freevfs, truncate the ! * tmpnode now. ! */ ! if (tnp->tn_size != 0 && tnp->tn_type == VREG) { ! rw_enter(&tnp->tn_rwlock, RW_WRITER); ! rw_enter(&tnp->tn_contents, RW_WRITER); ! ! (void) tmpnode_trunc(tm, tnp, 0); ! ! rw_exit(&tnp->tn_contents); ! rw_exit(&tnp->tn_rwlock); ! ! ASSERT(tnp->tn_size == 0); ! ASSERT(tnp->tn_nblocks == 0); ! } ! } else if (cnt > 0) { ! /* An open file; unwind the holds we've been adding. */ ! mutex_exit(&vp->v_lock); cancel = tm->tm_rootnode->tn_forw; while (cancel != tnp) { vp = TNTOV(cancel); ASSERT(vp->v_count > 0); VN_RELE(vp); cancel = cancel->tn_forw; } mutex_exit(&tm->tm_contents); return (EBUSY); - } else { - /* directly add a VN_HOLD since we have the lock */ - vp->v_count++; - mutex_exit(&vp->v_lock); } } - if (flag & MS_FORCE) { /* ! * Drop the zone ref now since we don't know how long it will ! * be until the final vfs_rele is called by tmp_inactive. */ - if (vfsp->vfs_zone) { - zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref, - ZONE_REF_VFS); - vfsp->vfs_zone = 0; - } - /* We can now drop the extra hold we added above. */ - VFS_RELE(vfsp); - } else { - /* - * For the non-forced case, we can drop the mutex now because - * no one can find this mount anymore - */ - vfsp->vfs_flag |= VFS_UNMOUNTED; mutex_exit(&tm->tm_contents); - } - return (0); - } - - /* - * Implementation of VFS_FREEVFS() to support forced umounts. This is called by - * the vfs framework after umount and the last VFS_RELE, to trigger the release - * of any resources still associated with the given vfs_t. We only add - * additional VFS_HOLDs during the forced umount case, so this is normally - * called immediately after tmp_umount. - */ - void - tmp_freevfs(vfs_t *vfsp) - { - struct tmount *tm = (struct tmount *)VFSTOTM(vfsp); - struct tmpnode *tnp; - struct vnode *vp; - /* * Free all kmemalloc'd and anonalloc'd memory associated with * this filesystem. To do this, we go through the file list twice, * once to remove all the directory entries, and then to remove * all the files. We do this because there is useful code in * tmpnode_free which assumes that the directory entry has been * removed before the file. */ - /* - * Now that we are tearing ourselves down we need to remove the - * UNMOUNTED flag. If we don't, we'll later hit a VN_RELE when we remove - * files from the system causing us to have a negative value. Doing this - * seems a bit better than trying to set a flag on the tmount that says - * we're tearing down. - */ - vfsp->vfs_flag &= ~VFS_UNMOUNTED; - - /* * Remove all directory entries */ for (tnp = tm->tm_rootnode; tnp; tnp = tnp->tn_forw) { rw_enter(&tnp->tn_rwlock, RW_WRITER); if (tnp->tn_type == VDIR) --- 411,477 ---- { struct tmount *tm = (struct tmount *)VFSTOTM(vfsp); struct tmpnode *tnp, *cancel; struct vnode *vp; int error; if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0) return (error); + /* + * forced unmount is not supported by this file system + * and thus, ENOTSUP, is being returned. + */ + if (flag & MS_FORCE) + return (ENOTSUP); + mutex_enter(&tm->tm_contents); /* ! * If there are no open files, only the root node should have ! * a reference count. * With tm_contents held, nothing can be added or removed. * There may be some dirty pages. To prevent fsflush from * disrupting the unmount, put a hold on each node while scanning. * If we find a previously referenced node, undo the holds we have * placed and fail EBUSY. */ tnp = tm->tm_rootnode; ! if (TNTOV(tnp)->v_count > 1) { mutex_exit(&tm->tm_contents); return (EBUSY); } for (tnp = tnp->tn_forw; tnp; tnp = tnp->tn_forw) { ! if ((vp = TNTOV(tnp))->v_count > 0) { cancel = tm->tm_rootnode->tn_forw; while (cancel != tnp) { vp = TNTOV(cancel); ASSERT(vp->v_count > 0); VN_RELE(vp); cancel = cancel->tn_forw; } mutex_exit(&tm->tm_contents); return (EBUSY); } + VN_HOLD(vp); } /* ! * We can drop the mutex now because no one can find this mount */ mutex_exit(&tm->tm_contents); /* * Free all kmemalloc'd and anonalloc'd memory associated with * this filesystem. To do this, we go through the file list twice, * once to remove all the directory entries, and then to remove * all the files. We do this because there is useful code in * tmpnode_free which assumes that the directory entry has been * removed before the file. */ /* * Remove all directory entries */ for (tnp = tm->tm_rootnode; tnp; tnp = tnp->tn_forw) { rw_enter(&tnp->tn_rwlock, RW_WRITER); if (tnp->tn_type == VDIR)
*** 696,715 **** tm->tm_rootnode->tn_xattrdp = NULL; VN_RELE(TNTOV(tm->tm_rootnode)); ASSERT(tm->tm_mntpath); ! kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1); ASSERT(tm->tm_anonmem == 0); mutex_destroy(&tm->tm_contents); mutex_destroy(&tm->tm_renamelck); ! kmem_free(tm, sizeof (struct tmount)); ! /* Allow _fini() to succeed now */ ! atomic_dec_32(&tmpfs_mountcount); } /* * return root tmpnode for given vnode */ --- 533,551 ---- tm->tm_rootnode->tn_xattrdp = NULL; VN_RELE(TNTOV(tm->tm_rootnode)); ASSERT(tm->tm_mntpath); ! tmp_memfree(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1); ASSERT(tm->tm_anonmem == 0); mutex_destroy(&tm->tm_contents); mutex_destroy(&tm->tm_renamelck); ! tmp_memfree(tm, sizeof (struct tmount)); ! return (0); } /* * return root tmpnode for given vnode */
*** 767,789 **** /* * If tm_anonmax for this mount is less than the available swap space * (minus the amount tmpfs can't use), use that instead */ ! if (blocks > tmpfs_minfree && tm->tm_anonmax > tm->tm_anonmem) { sbp->f_bfree = MIN(blocks - tmpfs_minfree, ! btop(tm->tm_anonmax) - btopr(tm->tm_anonmem)); ! } else { sbp->f_bfree = 0; - } sbp->f_bavail = sbp->f_bfree; /* * Total number of blocks is what's available plus what's been used */ ! sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + btopr(tm->tm_anonmem)); if (eff_zid != GLOBAL_ZONEUNIQID && zp->zone_max_swap_ctl != UINT64_MAX) { /* * If the fs is used by a non-global zone with a swap cap, --- 603,624 ---- /* * If tm_anonmax for this mount is less than the available swap space * (minus the amount tmpfs can't use), use that instead */ ! if (blocks > tmpfs_minfree) sbp->f_bfree = MIN(blocks - tmpfs_minfree, ! tm->tm_anonmax - tm->tm_anonmem); ! else sbp->f_bfree = 0; sbp->f_bavail = sbp->f_bfree; /* * Total number of blocks is what's available plus what's been used */ ! sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + tm->tm_anonmem); if (eff_zid != GLOBAL_ZONEUNIQID && zp->zone_max_swap_ctl != UINT64_MAX) { /* * If the fs is used by a non-global zone with a swap cap,
*** 809,820 **** * The maximum number of files available is approximately the number * of tmpnodes we can allocate from the remaining kernel memory * available to tmpfs. This is fairly inaccurate since it doesn't * take into account the names stored in the directory entries. */ ! sbp->f_ffree = sbp->f_files = ptob(availrmem) / (sizeof (struct tmpnode) + sizeof (struct tdirent)); sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree); (void) cmpldev(&d32, vfsp->vfs_dev); sbp->f_fsid = d32; (void) strcpy(sbp->f_basetype, vfssw[tmpfsfstype].vsw_name); (void) strncpy(sbp->f_fstr, tm->tm_mntpath, sizeof (sbp->f_fstr)); --- 644,661 ---- * The maximum number of files available is approximately the number * of tmpnodes we can allocate from the remaining kernel memory * available to tmpfs. This is fairly inaccurate since it doesn't * take into account the names stored in the directory entries. */ ! if (tmpfs_maxkmem > tmp_kmemspace) ! sbp->f_ffree = (tmpfs_maxkmem - tmp_kmemspace) / (sizeof (struct tmpnode) + sizeof (struct tdirent)); + else + sbp->f_ffree = 0; + + sbp->f_files = tmpfs_maxkmem / + (sizeof (struct tmpnode) + sizeof (struct tdirent)); sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree); (void) cmpldev(&d32, vfsp->vfs_dev); sbp->f_fsid = d32; (void) strcpy(sbp->f_basetype, vfssw[tmpfsfstype].vsw_name); (void) strncpy(sbp->f_fstr, tm->tm_mntpath, sizeof (sbp->f_fstr));