io-lx-public-vs-joyent Wdiff usr/src/uts/common/fs/tmpfs/tmp_vfsops.c

Print this page

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
          +++ new/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *

↓ open down ↓

12 lines elided

↑ open up ↑

  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright 2016 Joyent, Inc.
       23 + * Copyright 2015 Joyent, Inc.
  24   24   */
  25   25  
  26   26  #include <sys/types.h>
  27   27  #include <sys/param.h>
  28   28  #include <sys/sysmacros.h>
  29   29  #include <sys/kmem.h>
  30   30  #include <sys/time.h>
  31   31  #include <sys/pathname.h>
  32   32  #include <sys/vfs.h>
  33   33  #include <sys/vfs_opreg.h>

  34   34  #include <sys/vnode.h>
  35   35  #include <sys/stat.h>
  36   36  #include <sys/uio.h>
  37   37  #include <sys/stat.h>
  38   38  #include <sys/errno.h>
  39   39  #include <sys/cmn_err.h>
  40   40  #include <sys/cred.h>
  41   41  #include <sys/statvfs.h>
  42   42  #include <sys/mount.h>
  43   43  #include <sys/debug.h>
  44   44  #include <sys/systm.h>
  45   45  #include <sys/mntent.h>
  46   46  #include <fs/fs_subr.h>
  47   47  #include <vm/page.h>
  48   48  #include <vm/anon.h>

↓ open down ↓

15 lines elided

↑ open up ↑

  49   49  #include <sys/model.h>
  50   50  #include <sys/policy.h>
  51   51  
  52   52  #include <sys/fs/swapnode.h>
  53   53  #include <sys/fs/tmp.h>
  54   54  #include <sys/fs/tmpnode.h>
  55   55  
  56   56  static int tmpfsfstype;
  57   57  
  58   58  /*
  59      - * tmpfs_mountcount is used to prevent module unloads while there is still
  60      - * state from a former mount hanging around. With forced umount support, the
  61      - * filesystem module must not be allowed to go away before the last
  62      - * VFS_FREEVFS() call has been made. Since this is just an atomic counter,
  63      - * there's no need for locking.
  64      - */
  65      -static uint32_t tmpfs_mountcount;
  66      -
  67      -/*
  68   59   * tmpfs vfs operations.
  69   60   */
  70   61  static int tmpfsinit(int, char *);
  71   62  static int tmp_mount(struct vfs *, struct vnode *,
  72   63          struct mounta *, struct cred *);
  73   64  static int tmp_unmount(struct vfs *, int, struct cred *);
  74   65  static int tmp_root(struct vfs *, struct vnode **);
  75   66  static int tmp_statvfs(struct vfs *, struct statvfs64 *);
  76   67  static int tmp_vget(struct vfs *, struct vnode **, struct fid *);
  77      -static void tmp_freevfs(vfs_t *vfsp);
  78   68  
  79   69  /*
  80   70   * Loadable module wrapper
  81   71   */
  82   72  #include <sys/modctl.h>
  83   73  
  84   74  static mntopts_t tmpfs_proto_opttbl;
  85   75  
  86   76  static vfsdef_t vfw = {
  87   77          VFSDEF_VERSION,

  88   78          "tmpfs",
  89   79          tmpfsinit,
  90   80          VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_ZMOUNT,
  91   81          &tmpfs_proto_opttbl
  92   82  };
  93   83  
  94   84  /*
  95   85   * in-kernel mnttab options
  96   86   */
  97   87  static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
  98   88  static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
  99   89  
 100   90  static mntopt_t tmpfs_options[] = {
 101   91          /* Option name          Cancel Opt      Arg     Flags           Data */
 102   92          { MNTOPT_XATTR,         xattr_cancel,   NULL,   MO_DEFAULT,     NULL},
 103   93          { MNTOPT_NOXATTR,       noxattr_cancel, NULL,   NULL,           NULL},
 104   94          { "size",               NULL,           "0",    MO_HASVALUE,    NULL},
 105   95          { "mode",               NULL,           NULL,   MO_HASVALUE,    NULL}
 106   96  };
 107   97  
 108   98  
 109   99  static mntopts_t tmpfs_proto_opttbl = {
 110  100          sizeof (tmpfs_options) / sizeof (mntopt_t),
 111  101          tmpfs_options
 112  102  };
 113  103  
 114  104  /*
 115  105   * Module linkage information
 116  106   */
 117  107  static struct modlfs modlfs = {
 118  108          &mod_fsops, "filesystem for tmpfs", &vfw
 119  109  };
 120  110  
 121  111  static struct modlinkage modlinkage = {
 122  112          MODREV_1, &modlfs, NULL
 123  113  };
 124  114  
 125  115  int

↓ open down ↓

38 lines elided

↑ open up ↑

 126  116  _init()
 127  117  {
 128  118          return (mod_install(&modlinkage));
 129  119  }
 130  120  
 131  121  int
 132  122  _fini()
 133  123  {
 134  124          int error;
 135  125  
 136      -        /*
 137      -         * If a forceably unmounted instance is still hanging around, we cannot
 138      -         * allow the module to be unloaded because that would cause panics once
 139      -         * the VFS framework decides it's time to call into VFS_FREEVFS().
 140      -         */
 141      -        if (tmpfs_mountcount)
 142      -                return (EBUSY);
 143      -
 144  126          error = mod_remove(&modlinkage);
 145  127          if (error)
 146  128                  return (error);
 147  129          /*
 148  130           * Tear down the operations vectors
 149  131           */
 150  132          (void) vfs_freevfsops_by_type(tmpfsfstype);
 151  133          vn_freevnodeops(tmp_vnodeops);
 152  134          return (0);
 153  135  }
 154  136  
 155  137  int
 156  138  _info(struct modinfo *modinfop)
 157  139  {
 158  140          return (mod_info(&modlinkage, modinfop));
 159  141  }
 160  142  
 161  143  /*
      144 + * The following are patchable variables limiting the amount of system
      145 + * resources tmpfs can use.
      146 + *
      147 + * tmpfs_maxkmem limits the amount of kernel kmem_alloc memory
      148 + * tmpfs can use for it's data structures (e.g. tmpnodes, directory entries)
      149 + * It is not determined by setting a hard limit but rather as a percentage of
      150 + * physical memory which is determined when tmpfs is first used in the system.
      151 + *
 162  152   * tmpfs_minfree is the minimum amount of swap space that tmpfs leaves for
 163  153   * the rest of the system.  In other words, if the amount of free swap space
 164  154   * in the system (i.e. anoninfo.ani_free) drops below tmpfs_minfree, tmpfs
 165  155   * anon allocations will fail.
 166  156   *
 167  157   * There is also a per mount limit on the amount of swap space
 168  158   * (tmount.tm_anonmax) settable via a mount option.
 169  159   */
      160 +size_t tmpfs_maxkmem = 0;
 170  161  size_t tmpfs_minfree = 0;
      162 +size_t tmp_kmemspace;           /* bytes of kernel heap used by all tmpfs */
 171  163  
 172  164  static major_t tmpfs_major;
 173  165  static minor_t tmpfs_minor;
 174  166  static kmutex_t tmpfs_minor_lock;
 175  167  
 176  168  /*
 177  169   * initialize global tmpfs locks and such
 178  170   * called when loading tmpfs module
 179  171   */
 180  172  static int
 181  173  tmpfsinit(int fstype, char *name)
 182  174  {
 183  175          static const fs_operation_def_t tmp_vfsops_template[] = {
 184  176                  VFSNAME_MOUNT,          { .vfs_mount = tmp_mount },
 185  177                  VFSNAME_UNMOUNT,        { .vfs_unmount = tmp_unmount },
 186  178                  VFSNAME_ROOT,           { .vfs_root = tmp_root },
 187  179                  VFSNAME_STATVFS,        { .vfs_statvfs = tmp_statvfs },
 188  180                  VFSNAME_VGET,           { .vfs_vget = tmp_vget },
 189      -                VFSNAME_FREEVFS,        { .vfs_freevfs = tmp_freevfs },
 190  181                  NULL,                   NULL
 191  182          };
 192  183          int error;
 193  184          extern  void    tmpfs_hash_init();
 194  185  
 195  186          tmpfs_hash_init();
 196  187          tmpfsfstype = fstype;
 197  188          ASSERT(tmpfsfstype != 0);
 198  189  
 199  190          error = vfs_setfsops(fstype, tmp_vfsops_template, NULL);

 200  191          if (error != 0) {
 201  192                  cmn_err(CE_WARN, "tmpfsinit: bad vfs ops template");
 202  193                  return (error);
 203  194          }
 204  195  
 205  196          error = vn_make_ops(name, tmp_vnodeops_template, &tmp_vnodeops);
 206  197          if (error != 0) {
 207  198                  (void) vfs_freevfsops_by_type(fstype);
 208  199                  cmn_err(CE_WARN, "tmpfsinit: bad vnode ops template");
 209  200                  return (error);
 210  201          }
 211  202  
 212  203          /*
 213  204           * tmpfs_minfree doesn't need to be some function of configured

↓ open down ↓

14 lines elided

↑ open up ↑

 214  205           * swap space since it really is an absolute limit of swap space
 215  206           * which still allows other processes to execute.
 216  207           */
 217  208          if (tmpfs_minfree == 0) {
 218  209                  /*
 219  210                   * Set if not patched
 220  211                   */
 221  212                  tmpfs_minfree = btopr(TMPMINFREE);
 222  213          }
 223  214  
      215 +        /*
      216 +         * The maximum amount of space tmpfs can allocate is
      217 +         * TMPMAXPROCKMEM percent of kernel memory
      218 +         */
      219 +        if (tmpfs_maxkmem == 0)
      220 +                tmpfs_maxkmem = MAX(PAGESIZE, kmem_maxavail() / TMPMAXFRACKMEM);
      221 +
 224  222          if ((tmpfs_major = getudev()) == (major_t)-1) {
 225  223                  cmn_err(CE_WARN, "tmpfsinit: Can't get unique device number.");
 226  224                  tmpfs_major = 0;
 227  225          }
 228  226          mutex_init(&tmpfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
 229      -        tmpfs_mountcount = 0;
 230  227          return (0);
 231  228  }
 232  229  
 233  230  static int
 234  231  tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 235  232  {
 236  233          struct tmount *tm = NULL;
 237  234          struct tmpnode *tp;
 238  235          struct pathname dpn;
 239  236          int error;
 240      -        size_t anonmax;
      237 +        pgcnt_t anonmax;
 241  238          struct vattr rattr;
 242  239          int got_attrs;
 243  240          boolean_t mode_arg = B_FALSE;
 244  241          mode_t root_mode = 0777;
 245  242          char *argstr;
 246  243  
 247  244          if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 248  245                  return (error);
 249  246  
 250  247          if (mvp->v_type != VDIR)

 251  248                  return (ENOTDIR);
 252  249  
 253  250          mutex_enter(&mvp->v_lock);
 254  251          if ((uap->flags & MS_REMOUNT) == 0 && (uap->flags & MS_OVERLAY) == 0 &&
 255  252              (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 256  253                  mutex_exit(&mvp->v_lock);
 257  254                  return (EBUSY);
 258  255          }
 259  256          mutex_exit(&mvp->v_lock);
 260  257  
 261  258          /*
 262  259           * Having the resource be anything but "swap" doesn't make sense.
 263  260           */
 264  261          vfs_setresource(vfsp, "swap", 0);
 265  262  
 266  263          /*
 267  264           * now look for options we understand...
 268  265           */
 269  266  
 270  267          /* tmpfs doesn't support read-only mounts */
 271  268          if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
 272  269                  error = EINVAL;
 273  270                  goto out;

↓ open down ↓

23 lines elided

↑ open up ↑

 274  271          }
 275  272  
 276  273          /*
 277  274           * tm_anonmax is set according to the mount arguments
 278  275           * if any.  Otherwise, it is set to a maximum value.
 279  276           */
 280  277          if (vfs_optionisset(vfsp, "size", &argstr)) {
 281  278                  if ((error = tmp_convnum(argstr, &anonmax)) != 0)
 282  279                          goto out;
 283  280          } else {
 284      -                anonmax = SIZE_MAX;
      281 +                anonmax = ULONG_MAX;
 285  282          }
 286  283  
 287  284          /*
 288  285           * The "mode" mount argument allows the operator to override the
 289  286           * permissions of the root of the tmpfs mount.
 290  287           */
 291  288          if (vfs_optionisset(vfsp, "mode", &argstr)) {
 292  289                  if ((error = tmp_convmode(argstr, &root_mode)) != 0) {
 293  290                          goto out;
 294  291                  }

 295  292                  mode_arg = B_TRUE;
 296  293          }
 297  294  
 298  295          if (error = pn_get(uap->dir,
 299  296              (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn))
 300  297                  goto out;
 301  298  
 302  299          if (uap->flags & MS_REMOUNT) {
 303  300                  tm = (struct tmount *)VFSTOTM(vfsp);
 304  301  
 305  302                  /*
 306  303                   * If we change the size so its less than what is currently

↓ open down ↓

12 lines elided

↑ open up ↑

 307  304                   * being used, we allow that. The file system will simply be
 308  305                   * full until enough files have been removed to get below the
 309  306                   * new max.
 310  307                   */
 311  308                  mutex_enter(&tm->tm_contents);
 312  309                  tm->tm_anonmax = anonmax;
 313  310                  mutex_exit(&tm->tm_contents);
 314  311                  goto out;
 315  312          }
 316  313  
 317      -        if ((tm = kmem_zalloc(sizeof (struct tmount),
 318      -            KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
      314 +        if ((tm = tmp_memalloc(sizeof (struct tmount), 0)) == NULL) {
 319  315                  pn_free(&dpn);
 320  316                  error = ENOMEM;
 321  317                  goto out;
 322  318          }
 323  319  
 324  320          /*
 325  321           * find an available minor device number for this mount
 326  322           */
 327  323          mutex_enter(&tmpfs_minor_lock);
 328  324          do {

 329  325                  tmpfs_minor = (tmpfs_minor + 1) & L_MAXMIN32;
 330  326                  tm->tm_dev = makedevice(tmpfs_major, tmpfs_minor);
 331  327          } while (vfs_devismounted(tm->tm_dev));
 332  328          mutex_exit(&tmpfs_minor_lock);
 333  329  
 334  330          /*
 335  331           * Set but don't bother entering the mutex
 336  332           * (tmount not on mount list yet)
 337  333           */
 338  334          mutex_init(&tm->tm_contents, NULL, MUTEX_DEFAULT, NULL);
 339  335          mutex_init(&tm->tm_renamelck, NULL, MUTEX_DEFAULT, NULL);

↓ open down ↓

11 lines elided

↑ open up ↑

 340  336  
 341  337          tm->tm_vfsp = vfsp;
 342  338          tm->tm_anonmax = anonmax;
 343  339  
 344  340          vfsp->vfs_data = (caddr_t)tm;
 345  341          vfsp->vfs_fstype = tmpfsfstype;
 346  342          vfsp->vfs_dev = tm->tm_dev;
 347  343          vfsp->vfs_bsize = PAGESIZE;
 348  344          vfsp->vfs_flag |= VFS_NOTRUNC;
 349  345          vfs_make_fsid(&vfsp->vfs_fsid, tm->tm_dev, tmpfsfstype);
 350      -        tm->tm_mntpath = kmem_zalloc(dpn.pn_pathlen + 1, KM_SLEEP);
      346 +        tm->tm_mntpath = tmp_memalloc(dpn.pn_pathlen + 1, TMP_MUSTHAVE);
 351  347          (void) strcpy(tm->tm_mntpath, dpn.pn_path);
 352  348  
 353  349          /*
 354      -         * Preemptively set vfs_zone before any of the tmp_kmem_* functions are
 355      -         * called.  That field is not populated until after a successful
 356      -         * VFS_MOUNT when domount() sets vfsp metadata via vfs_add().  An
 357      -         * accurate value is required for proper swap usage accounting.
 358      -         */
 359      -        ASSERT0(uap->flags & MS_REMOUNT);
 360      -        ASSERT(vfsp->vfs_zone == NULL);
 361      -        vfsp->vfs_zone = curproc->p_zone;
 362      -
 363      -        /*
 364  350           * allocate and initialize root tmpnode structure
 365  351           */
 366  352          bzero(&rattr, sizeof (struct vattr));
 367  353          rattr.va_mode = (mode_t)(S_IFDIR | root_mode);
 368  354          rattr.va_type = VDIR;
 369  355          rattr.va_rdev = 0;
 370      -        tp = tmp_kmem_zalloc(tm, sizeof (struct tmpnode), KM_SLEEP);
 371      -        if (tp == NULL) {
 372      -                kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
 373      -                mutex_destroy(&tm->tm_contents);
 374      -                mutex_destroy(&tm->tm_renamelck);
 375      -                kmem_free(tm, sizeof (struct tmount));
 376      -
 377      -                pn_free(&dpn);
 378      -                error = ENOMEM;
 379      -                goto out;
 380      -        }
      356 +        tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
 381  357          tmpnode_init(tm, tp, &rattr, cr);
 382  358  
 383  359          /*
 384  360           * Get the mode, uid, and gid from the underlying mount point.
 385  361           */
 386  362          rattr.va_mask = AT_MODE|AT_UID|AT_GID;  /* Hint to getattr */
 387  363          got_attrs = VOP_GETATTR(mvp, &rattr, 0, cr, NULL);
 388  364  
 389  365          rw_enter(&tp->tn_rwlock, RW_WRITER);
 390  366          TNTOV(tp)->v_flag |= VROOT;

 391  367  
 392  368          /*
 393  369           * If the getattr succeeded, use its results.  Otherwise allow
 394  370           * the previously set hardwired defaults to prevail.
 395  371           */
 396  372          if (got_attrs == 0) {
 397  373                  if (!mode_arg) {
 398  374                          /*
 399  375                           * Only use the underlying mount point for the
 400  376                           * mode if the "mode" mount argument was not
 401  377                           * provided.
 402  378                           */
 403  379                          tp->tn_mode = rattr.va_mode;
 404  380                  }
 405  381                  tp->tn_uid = rattr.va_uid;
 406  382                  tp->tn_gid = rattr.va_gid;
 407  383          }
 408  384

↓ open down ↓

18 lines elided

↑ open up ↑

 409  385          /*
 410  386           * initialize linked list of tmpnodes so that the back pointer of
 411  387           * the root tmpnode always points to the last one on the list
 412  388           * and the forward pointer of the last node is null
 413  389           */
 414  390          tp->tn_back = tp;
 415  391          tp->tn_forw = NULL;
 416  392          tp->tn_nlink = 0;
 417  393          tm->tm_rootnode = tp;
 418  394  
 419      -        if (tdirinit(tp, tp) != 0) {
 420      -                /*
 421      -                 * While we would normally let our VOP_INACTIVE function take
 422      -                 * care of cleaning up here, we're in a bit of a delicate
 423      -                 * situation, so we do so manually. While it's tempting to try
 424      -                 * and rely upon tmpfs_freevfs() and others, it's probably safer
 425      -                 * for the time to do this manually at the cost of duplication.
 426      -                 */
 427      -                vn_invalid(TNTOV(tp));
 428      -                rw_destroy(&tp->tn_rwlock);
 429      -                mutex_destroy(&tp->tn_tlock);
 430      -                vn_free(TNTOV(tp));
 431      -                tmp_kmem_free(tm, tp, sizeof (struct tmpnode));
      395 +        tdirinit(tp, tp);
 432  396  
 433      -                kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
 434      -                mutex_destroy(&tm->tm_contents);
 435      -                mutex_destroy(&tm->tm_renamelck);
 436      -                kmem_free(tm, sizeof (struct tmount));
 437      -                pn_free(&dpn);
 438      -                error = ENOMEM;
 439      -                goto out;
 440      -        }
 441      -
 442  397          rw_exit(&tp->tn_rwlock);
 443  398  
 444  399          pn_free(&dpn);
 445  400          error = 0;
 446      -        atomic_inc_32(&tmpfs_mountcount);
 447  401  
 448  402  out:
 449  403          if (error == 0)
 450  404                  vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
 451  405  
 452  406          return (error);
 453  407  }
 454  408  
 455  409  static int
 456  410  tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
 457  411  {
 458  412          struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
 459  413          struct tmpnode *tnp, *cancel;
 460  414          struct vnode    *vp;
 461  415          int error;
 462      -        uint_t cnt;
 463      -        int i;
 464  416  
 465  417          if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
 466  418                  return (error);
 467  419  
      420 +        /*
      421 +         * forced unmount is not supported by this file system
      422 +         * and thus, ENOTSUP, is being returned.
      423 +         */
      424 +        if (flag & MS_FORCE)
      425 +                return (ENOTSUP);
      426 +
 468  427          mutex_enter(&tm->tm_contents);
 469  428  
 470  429          /*
 471      -         * In the normal unmount case (non-forced unmount), if there are no
 472      -         * open files, only the root node should have a reference count.
 473      -         *
      430 +         * If there are no open files, only the root node should have
      431 +         * a reference count.
 474  432           * With tm_contents held, nothing can be added or removed.
 475  433           * There may be some dirty pages.  To prevent fsflush from
 476  434           * disrupting the unmount, put a hold on each node while scanning.
 477  435           * If we find a previously referenced node, undo the holds we have
 478  436           * placed and fail EBUSY.
 479      -         *
 480      -         * However, in the case of a forced umount, things are a bit different.
 481      -         * An additional VFS_HOLD is added for each outstanding VN_HOLD to
 482      -         * ensure that the file system is not cleaned up (tmp_freevfs) until
 483      -         * the last vfs hold is dropped. This happens in tmp_inactive as the
 484      -         * vnodes are released. Also, we can't add an additional VN_HOLD in
 485      -         * this case since that would prevent tmp_inactive from ever being
 486      -         * called. Finally, we do need to drop the zone ref now (zone_rele_ref)
 487      -         * so that the zone is not blocked waiting for the final file system
 488      -         * cleanup.
 489  437           */
 490  438          tnp = tm->tm_rootnode;
 491      -
 492      -        vp = TNTOV(tnp);
 493      -        mutex_enter(&vp->v_lock);
 494      -        cnt = vp->v_count;
 495      -        if (flag & MS_FORCE) {
 496      -                vfsp->vfs_flag |= VFS_UNMOUNTED;
 497      -                /* Extra hold which we rele below when we drop the zone ref */
 498      -                VFS_HOLD(vfsp);
 499      -
 500      -                for (i = 1; i < cnt; i++)
 501      -                        VFS_HOLD(vfsp);
 502      -
 503      -                /* drop the mutex now because no one can find this mount */
      439 +        if (TNTOV(tnp)->v_count > 1) {
 504  440                  mutex_exit(&tm->tm_contents);
 505      -        } else if (cnt > 1) {
 506      -                mutex_exit(&vp->v_lock);
 507      -                mutex_exit(&tm->tm_contents);
 508  441                  return (EBUSY);
 509  442          }
 510      -        mutex_exit(&vp->v_lock);
 511  443  
 512      -        /*
 513      -         * Check for open files. An open file causes everything to unwind
 514      -         * unless this is a forced umount.
 515      -         */
 516  444          for (tnp = tnp->tn_forw; tnp; tnp = tnp->tn_forw) {
 517      -                vp = TNTOV(tnp);
 518      -                mutex_enter(&vp->v_lock);
 519      -                cnt = vp->v_count;
 520      -                if (flag & MS_FORCE) {
 521      -                        for (i = 0; i < cnt; i++)
 522      -                                VFS_HOLD(vfsp);
 523      -
 524      -                        /*
 525      -                         * In the case of a forced umount don't add an
 526      -                         * additional VN_HOLD on the already held vnodes, like
 527      -                         * we do in the non-forced unmount case. If the
 528      -                         * cnt > 0, then the vnode already has at least one
 529      -                         * hold and we need tmp_inactive to get called when the
 530      -                         * last pre-existing hold on the node is released so
 531      -                         * that we can VFS_RELE the VFS holds we just added.
 532      -                         */
 533      -                        if (cnt == 0) {
 534      -                                /* directly add VN_HOLD since have the lock */
 535      -                                vp->v_count++;
 536      -                        }
 537      -
 538      -                        mutex_exit(&vp->v_lock);
 539      -
 540      -                        /*
 541      -                         * If the tmpnode has any pages associated with it
 542      -                         * (i.e. if it's a normal file with non-zero size), the
 543      -                         * tmpnode could still be discovered by pageout or
 544      -                         * fsflush via the page vnode pointers. To prevent this
 545      -                         * from interfering with the tmp_freevfs, truncate the
 546      -                         * tmpnode now.
 547      -                         */
 548      -                        if (tnp->tn_size != 0 && tnp->tn_type == VREG) {
 549      -                                rw_enter(&tnp->tn_rwlock, RW_WRITER);
 550      -                                rw_enter(&tnp->tn_contents, RW_WRITER);
 551      -
 552      -                                (void) tmpnode_trunc(tm, tnp, 0);
 553      -
 554      -                                rw_exit(&tnp->tn_contents);
 555      -                                rw_exit(&tnp->tn_rwlock);
 556      -
 557      -                                ASSERT(tnp->tn_size == 0);
 558      -                                ASSERT(tnp->tn_nblocks == 0);
 559      -                        }
 560      -                } else if (cnt > 0) {
 561      -                        /* An open file; unwind the holds we've been adding. */
 562      -                        mutex_exit(&vp->v_lock);
      445 +                if ((vp = TNTOV(tnp))->v_count > 0) {
 563  446                          cancel = tm->tm_rootnode->tn_forw;
 564  447                          while (cancel != tnp) {
 565  448                                  vp = TNTOV(cancel);
 566  449                                  ASSERT(vp->v_count > 0);
 567  450                                  VN_RELE(vp);
 568  451                                  cancel = cancel->tn_forw;
 569  452                          }
 570  453                          mutex_exit(&tm->tm_contents);
 571  454                          return (EBUSY);
 572      -                } else {
 573      -                        /* directly add a VN_HOLD since we have the lock */
 574      -                        vp->v_count++;
 575      -                        mutex_exit(&vp->v_lock);
 576  455                  }
      456 +                VN_HOLD(vp);
 577  457          }
 578  458  
 579      -        if (flag & MS_FORCE) {
 580      -                /*
 581      -                 * Drop the zone ref now since we don't know how long it will
 582      -                 * be until the final vfs_rele is called by tmp_inactive.
 583      -                 */
 584      -                if (vfsp->vfs_zone) {
 585      -                        zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref,
 586      -                            ZONE_REF_VFS);
 587      -                        vfsp->vfs_zone = 0;
 588      -                }
 589      -                /* We can now drop the extra hold we added above. */
 590      -                VFS_RELE(vfsp);
 591      -        } else {
 592      -                /*
 593      -                 * For the non-forced case, we can drop the mutex now because
 594      -                 * no one can find this mount anymore
 595      -                 */
 596      -                vfsp->vfs_flag |= VFS_UNMOUNTED;
 597      -                mutex_exit(&tm->tm_contents);
 598      -        }
      459 +        /*
      460 +         * We can drop the mutex now because no one can find this mount
      461 +         */
      462 +        mutex_exit(&tm->tm_contents);
 599  463  
 600      -        return (0);
 601      -}
 602      -
 603      -/*
 604      - * Implementation of VFS_FREEVFS() to support forced umounts. This is called by
 605      - * the vfs framework after umount and the last VFS_RELE, to trigger the release
 606      - * of any resources still associated with the given vfs_t. We only add
 607      - * additional VFS_HOLDs during the forced umount case, so this is normally
 608      - * called immediately after tmp_umount.
 609      - */
 610      -void
 611      -tmp_freevfs(vfs_t *vfsp)
 612      -{
 613      -        struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
 614      -        struct tmpnode *tnp;
 615      -        struct vnode    *vp;
 616      -
 617  464          /*
 618  465           * Free all kmemalloc'd and anonalloc'd memory associated with
 619  466           * this filesystem.  To do this, we go through the file list twice,
 620  467           * once to remove all the directory entries, and then to remove
 621  468           * all the files.  We do this because there is useful code in
 622  469           * tmpnode_free which assumes that the directory entry has been
 623  470           * removed before the file.
 624  471           */
 625      -
 626  472          /*
 627      -         * Now that we are tearing ourselves down we need to remove the
 628      -         * UNMOUNTED flag. If we don't, we'll later hit a VN_RELE when we remove
 629      -         * files from the system causing us to have a negative value. Doing this
 630      -         * seems a bit better than trying to set a flag on the tmount that says
 631      -         * we're tearing down.
 632      -         */
 633      -        vfsp->vfs_flag &= ~VFS_UNMOUNTED;
 634      -
 635      -        /*
 636  473           * Remove all directory entries
 637  474           */
 638  475          for (tnp = tm->tm_rootnode; tnp; tnp = tnp->tn_forw) {
 639  476                  rw_enter(&tnp->tn_rwlock, RW_WRITER);
 640  477                  if (tnp->tn_type == VDIR)
 641  478                          tdirtrunc(tnp);
 642  479                  if (tnp->tn_vnode->v_flag & V_XATTRDIR) {
 643  480                          /*
 644  481                           * Account for implicit attrdir reference.
 645  482                           */

 646  483                          ASSERT(tnp->tn_nlink > 0);
 647  484                          DECR_COUNT(&tnp->tn_nlink, &tnp->tn_tlock);
 648  485                  }
 649  486                  rw_exit(&tnp->tn_rwlock);
 650  487          }
 651  488  
 652  489          ASSERT(tm->tm_rootnode);
 653  490  
 654  491          /*
 655  492           * All links are gone, v_count is keeping nodes in place.
 656  493           * VN_RELE should make the node disappear, unless somebody
 657  494           * is holding pages against it.  Nap and retry until it disappears.
 658  495           *
 659  496           * We re-acquire the lock to prevent others who have a HOLD on
 660  497           * a tmpnode via its pages or anon slots from blowing it away
 661  498           * (in tmp_inactive) while we're trying to get to it here. Once
 662  499           * we have a HOLD on it we know it'll stick around.
 663  500           *
 664  501           */
 665  502          mutex_enter(&tm->tm_contents);
 666  503          /*
 667  504           * Remove all the files (except the rootnode) backwards.
 668  505           */
 669  506          while ((tnp = tm->tm_rootnode->tn_back) != tm->tm_rootnode) {
 670  507                  mutex_exit(&tm->tm_contents);
 671  508                  /*
 672  509                   * Inhibit tmp_inactive from touching attribute directory
 673  510                   * as all nodes will be released here.
 674  511                   * Note we handled the link count in pass 2 above.
 675  512                   */
 676  513                  rw_enter(&tnp->tn_rwlock, RW_WRITER);
 677  514                  tnp->tn_xattrdp = NULL;
 678  515                  rw_exit(&tnp->tn_rwlock);
 679  516                  vp = TNTOV(tnp);
 680  517                  VN_RELE(vp);
 681  518                  mutex_enter(&tm->tm_contents);
 682  519                  /*
 683  520                   * It's still there after the RELE. Someone else like pageout
 684  521                   * has a hold on it so wait a bit and then try again - we know
 685  522                   * they'll give it up soon.
 686  523                   */
 687  524                  if (tnp == tm->tm_rootnode->tn_back) {
 688  525                          VN_HOLD(vp);
 689  526                          mutex_exit(&tm->tm_contents);
 690  527                          delay(hz / 4);

↓ open down ↓

45 lines elided

↑ open up ↑

 691  528                          mutex_enter(&tm->tm_contents);
 692  529                  }
 693  530          }
 694  531          mutex_exit(&tm->tm_contents);
 695  532  
 696  533          tm->tm_rootnode->tn_xattrdp = NULL;
 697  534          VN_RELE(TNTOV(tm->tm_rootnode));
 698  535  
 699  536          ASSERT(tm->tm_mntpath);
 700  537  
 701      -        kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
      538 +        tmp_memfree(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
 702  539  
 703  540          ASSERT(tm->tm_anonmem == 0);
 704  541  
 705  542          mutex_destroy(&tm->tm_contents);
 706  543          mutex_destroy(&tm->tm_renamelck);
 707      -        kmem_free(tm, sizeof (struct tmount));
      544 +        tmp_memfree(tm, sizeof (struct tmount));
 708  545  
 709      -        /* Allow _fini() to succeed now */
 710      -        atomic_dec_32(&tmpfs_mountcount);
      546 +        return (0);
 711  547  }
 712  548  
 713  549  /*
 714  550   * return root tmpnode for given vnode
 715  551   */
 716  552  static int
 717  553  tmp_root(struct vfs *vfsp, struct vnode **vpp)
 718  554  {
 719  555          struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
 720  556          struct tmpnode *tp = tm->tm_rootnode;

 721  557          struct vnode *vp;
 722  558  
 723  559          ASSERT(tp);
 724  560  
 725  561          vp = TNTOV(tp);
 726  562          VN_HOLD(vp);
 727  563          *vpp = vp;
 728  564          return (0);
 729  565  }
 730  566  
 731  567  static int
 732  568  tmp_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
 733  569  {
 734  570          struct tmount   *tm = (struct tmount *)VFSTOTM(vfsp);
 735  571          ulong_t blocks;
 736  572          dev32_t d32;
 737  573          zoneid_t eff_zid;
 738  574          struct zone *zp;
 739  575  
 740  576          /*
 741  577           * The file system may have been mounted by the global zone on
 742  578           * behalf of the non-global zone.  In that case, the tmount zone_id
 743  579           * will be the global zone.  We still want to show the swap cap inside
 744  580           * the zone in this case, even though the file system was mounted by
 745  581           * the global zone.
 746  582           */
 747  583          if (curproc->p_zone->zone_id != GLOBAL_ZONEUNIQID)
 748  584                  zp = curproc->p_zone;
 749  585          else
 750  586                  zp = tm->tm_vfsp->vfs_zone;
 751  587  
 752  588          if (zp == NULL)
 753  589                  eff_zid = GLOBAL_ZONEUNIQID;
 754  590          else
 755  591                  eff_zid = zp->zone_id;
 756  592  
 757  593          sbp->f_bsize = PAGESIZE;
 758  594          sbp->f_frsize = PAGESIZE;
 759  595  
 760  596          /*
 761  597           * Find the amount of available physical and memory swap

↓ open down ↓

41 lines elided

↑ open up ↑

 762  598           */
 763  599          mutex_enter(&anoninfo_lock);
 764  600          ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
 765  601          blocks = (ulong_t)CURRENT_TOTAL_AVAILABLE_SWAP;
 766  602          mutex_exit(&anoninfo_lock);
 767  603  
 768  604          /*
 769  605           * If tm_anonmax for this mount is less than the available swap space
 770  606           * (minus the amount tmpfs can't use), use that instead
 771  607           */
 772      -        if (blocks > tmpfs_minfree && tm->tm_anonmax > tm->tm_anonmem) {
      608 +        if (blocks > tmpfs_minfree)
 773  609                  sbp->f_bfree = MIN(blocks - tmpfs_minfree,
 774      -                    btop(tm->tm_anonmax) - btopr(tm->tm_anonmem));
 775      -        } else {
      610 +                    tm->tm_anonmax - tm->tm_anonmem);
      611 +        else
 776  612                  sbp->f_bfree = 0;
 777      -        }
 778  613  
 779  614          sbp->f_bavail = sbp->f_bfree;
 780  615  
 781  616          /*
 782  617           * Total number of blocks is what's available plus what's been used
 783  618           */
 784      -        sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + btopr(tm->tm_anonmem));
      619 +        sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + tm->tm_anonmem);
 785  620  
 786  621          if (eff_zid != GLOBAL_ZONEUNIQID &&
 787  622              zp->zone_max_swap_ctl != UINT64_MAX) {
 788  623                  /*
 789  624                   * If the fs is used by a non-global zone with a swap cap,
 790  625                   * then report the capped size.
 791  626                   */
 792  627                  rctl_qty_t cap, used;
 793  628                  pgcnt_t pgcap, pgused;
 794  629

 795  630                  mutex_enter(&zp->zone_mem_lock);
 796  631                  cap = zp->zone_max_swap_ctl;
 797  632                  used = zp->zone_max_swap;
 798  633                  mutex_exit(&zp->zone_mem_lock);
 799  634  
 800  635                  pgcap = btop(cap);
 801  636                  pgused = btop(used);
 802  637  
 803  638                  sbp->f_bfree = MIN(pgcap - pgused, sbp->f_bfree);

↓ open down ↓

9 lines elided

↑ open up ↑

 804  639                  sbp->f_bavail = sbp->f_bfree;
 805  640                  sbp->f_blocks = MIN(pgcap, sbp->f_blocks);
 806  641          }
 807  642  
 808  643          /*
 809  644           * The maximum number of files available is approximately the number
 810  645           * of tmpnodes we can allocate from the remaining kernel memory
 811  646           * available to tmpfs.  This is fairly inaccurate since it doesn't
 812  647           * take into account the names stored in the directory entries.
 813  648           */
 814      -        sbp->f_ffree = sbp->f_files = ptob(availrmem) /
      649 +        if (tmpfs_maxkmem > tmp_kmemspace)
      650 +                sbp->f_ffree = (tmpfs_maxkmem - tmp_kmemspace) /
      651 +                    (sizeof (struct tmpnode) + sizeof (struct tdirent));
      652 +        else
      653 +                sbp->f_ffree = 0;
      654 +
      655 +        sbp->f_files = tmpfs_maxkmem /
 815  656              (sizeof (struct tmpnode) + sizeof (struct tdirent));
 816  657          sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
 817  658          (void) cmpldev(&d32, vfsp->vfs_dev);
 818  659          sbp->f_fsid = d32;
 819  660          (void) strcpy(sbp->f_basetype, vfssw[tmpfsfstype].vsw_name);
 820  661          (void) strncpy(sbp->f_fstr, tm->tm_mntpath, sizeof (sbp->f_fstr));
 821  662          /*
 822  663           * ensure null termination
 823  664           */
 824  665          sbp->f_fstr[sizeof (sbp->f_fstr) - 1] = '\0';

 825  666          sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
 826  667          sbp->f_namemax = MAXNAMELEN - 1;
 827  668          return (0);
 828  669  }
 829  670  
 830  671  static int
 831  672  tmp_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
 832  673  {
 833  674          struct tfid *tfid;
 834  675          struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
 835  676          struct tmpnode *tp = NULL;
 836  677  
 837  678          tfid = (struct tfid *)fidp;
 838  679          *vpp = NULL;
 839  680  
 840  681          mutex_enter(&tm->tm_contents);
 841  682          for (tp = tm->tm_rootnode; tp; tp = tp->tn_forw) {
 842  683                  mutex_enter(&tp->tn_tlock);
 843  684                  if (tp->tn_nodeid == tfid->tfid_ino) {
 844  685                          /*
 845  686                           * If the gen numbers don't match we know the
 846  687                           * file won't be found since only one tmpnode
 847  688                           * can have this number at a time.
 848  689                           */
 849  690                          if (tp->tn_gen != tfid->tfid_gen || tp->tn_nlink == 0) {
 850  691                                  mutex_exit(&tp->tn_tlock);
 851  692                                  mutex_exit(&tm->tm_contents);
 852  693                                  return (0);
 853  694                          }
 854  695                          *vpp = (struct vnode *)TNTOV(tp);
 855  696  
 856  697                          VN_HOLD(*vpp);
 857  698  
 858  699                          if ((tp->tn_mode & S_ISVTX) &&
 859  700                              !(tp->tn_mode & (S_IXUSR | S_IFDIR))) {
 860  701                                  mutex_enter(&(*vpp)->v_lock);
 861  702                                  (*vpp)->v_flag |= VISSWAP;
 862  703                                  mutex_exit(&(*vpp)->v_lock);
 863  704                          }
 864  705                          mutex_exit(&tp->tn_tlock);
 865  706                          mutex_exit(&tm->tm_contents);
 866  707                          return (0);
 867  708                  }
 868  709                  mutex_exit(&tp->tn_tlock);
 869  710          }
 870  711          mutex_exit(&tm->tm_contents);
 871  712          return (0);
 872  713  }

↓ open down ↓

48 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX