Print this page
    
OS-4043 tmpfs should support gigabyte sizes
OS-4044 tmpfs should support "mode" option
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
          +++ new/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright (c) 2011, Joyent, Inc. All rights reserved.
       23 + * Copyright 2015 Joyent, Inc.
  24   24   */
  25   25  
  26   26  #include <sys/types.h>
  27   27  #include <sys/param.h>
  28   28  #include <sys/sysmacros.h>
  29   29  #include <sys/kmem.h>
  30   30  #include <sys/time.h>
  31   31  #include <sys/pathname.h>
  32   32  #include <sys/vfs.h>
  33   33  #include <sys/vfs_opreg.h>
  34   34  #include <sys/vnode.h>
  35   35  #include <sys/stat.h>
  36   36  #include <sys/uio.h>
  37   37  #include <sys/stat.h>
  38   38  #include <sys/errno.h>
  39   39  #include <sys/cmn_err.h>
  40   40  #include <sys/cred.h>
  41   41  #include <sys/statvfs.h>
  42   42  #include <sys/mount.h>
  43   43  #include <sys/debug.h>
  44   44  #include <sys/systm.h>
  45   45  #include <sys/mntent.h>
  46   46  #include <fs/fs_subr.h>
  47   47  #include <vm/page.h>
  48   48  #include <vm/anon.h>
  49   49  #include <sys/model.h>
  50   50  #include <sys/policy.h>
  51   51  
  52   52  #include <sys/fs/swapnode.h>
  53   53  #include <sys/fs/tmp.h>
  54   54  #include <sys/fs/tmpnode.h>
  55   55  
  56   56  static int tmpfsfstype;
  57   57  
  58   58  /*
  59   59   * tmpfs vfs operations.
  60   60   */
  61   61  static int tmpfsinit(int, char *);
  62   62  static int tmp_mount(struct vfs *, struct vnode *,
  63   63          struct mounta *, struct cred *);
  64   64  static int tmp_unmount(struct vfs *, int, struct cred *);
  65   65  static int tmp_root(struct vfs *, struct vnode **);
  66   66  static int tmp_statvfs(struct vfs *, struct statvfs64 *);
  67   67  static int tmp_vget(struct vfs *, struct vnode **, struct fid *);
  68   68  
  69   69  /*
  70   70   * Loadable module wrapper
  71   71   */
  72   72  #include <sys/modctl.h>
  73   73  
  74   74  static mntopts_t tmpfs_proto_opttbl;
  75   75  
  76   76  static vfsdef_t vfw = {
  77   77          VFSDEF_VERSION,
  78   78          "tmpfs",
  79   79          tmpfsinit,
  80   80          VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_ZMOUNT,
  81   81          &tmpfs_proto_opttbl
  82   82  };
  83   83  
  
    | 
      ↓ open down ↓ | 
    50 lines elided | 
    
      ↑ open up ↑ | 
  
  84   84  /*
  85   85   * in-kernel mnttab options
  86   86   */
  87   87  static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
  88   88  static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
  89   89  
  90   90  static mntopt_t tmpfs_options[] = {
  91   91          /* Option name          Cancel Opt      Arg     Flags           Data */
  92   92          { MNTOPT_XATTR,         xattr_cancel,   NULL,   MO_DEFAULT,     NULL},
  93   93          { MNTOPT_NOXATTR,       noxattr_cancel, NULL,   NULL,           NULL},
  94      -        { "size",               NULL,           "0",    MO_HASVALUE,    NULL}
       94 +        { "size",               NULL,           "0",    MO_HASVALUE,    NULL},
       95 +        { "mode",               NULL,           NULL,   MO_HASVALUE,    NULL}
  95   96  };
  96   97  
  97   98  
  98   99  static mntopts_t tmpfs_proto_opttbl = {
  99  100          sizeof (tmpfs_options) / sizeof (mntopt_t),
 100  101          tmpfs_options
 101  102  };
 102  103  
 103  104  /*
 104  105   * Module linkage information
 105  106   */
 106  107  static struct modlfs modlfs = {
 107  108          &mod_fsops, "filesystem for tmpfs", &vfw
 108  109  };
 109  110  
 110  111  static struct modlinkage modlinkage = {
 111  112          MODREV_1, &modlfs, NULL
 112  113  };
 113  114  
 114  115  int
 115  116  _init()
 116  117  {
 117  118          return (mod_install(&modlinkage));
 118  119  }
 119  120  
 120  121  int
 121  122  _fini()
 122  123  {
 123  124          int error;
 124  125  
 125  126          error = mod_remove(&modlinkage);
 126  127          if (error)
 127  128                  return (error);
 128  129          /*
 129  130           * Tear down the operations vectors
 130  131           */
 131  132          (void) vfs_freevfsops_by_type(tmpfsfstype);
 132  133          vn_freevnodeops(tmp_vnodeops);
 133  134          return (0);
 134  135  }
 135  136  
 136  137  int
 137  138  _info(struct modinfo *modinfop)
 138  139  {
 139  140          return (mod_info(&modlinkage, modinfop));
 140  141  }
 141  142  
 142  143  /*
 143  144   * The following are patchable variables limiting the amount of system
 144  145   * resources tmpfs can use.
 145  146   *
 146  147   * tmpfs_maxkmem limits the amount of kernel kmem_alloc memory
 147  148   * tmpfs can use for it's data structures (e.g. tmpnodes, directory entries)
 148  149   * It is not determined by setting a hard limit but rather as a percentage of
 149  150   * physical memory which is determined when tmpfs is first used in the system.
 150  151   *
 151  152   * tmpfs_minfree is the minimum amount of swap space that tmpfs leaves for
 152  153   * the rest of the system.  In other words, if the amount of free swap space
 153  154   * in the system (i.e. anoninfo.ani_free) drops below tmpfs_minfree, tmpfs
 154  155   * anon allocations will fail.
 155  156   *
 156  157   * There is also a per mount limit on the amount of swap space
 157  158   * (tmount.tm_anonmax) settable via a mount option.
 158  159   */
 159  160  size_t tmpfs_maxkmem = 0;
 160  161  size_t tmpfs_minfree = 0;
 161  162  size_t tmp_kmemspace;           /* bytes of kernel heap used by all tmpfs */
 162  163  
 163  164  static major_t tmpfs_major;
 164  165  static minor_t tmpfs_minor;
 165  166  static kmutex_t tmpfs_minor_lock;
 166  167  
 167  168  /*
 168  169   * initialize global tmpfs locks and such
 169  170   * called when loading tmpfs module
 170  171   */
 171  172  static int
 172  173  tmpfsinit(int fstype, char *name)
 173  174  {
 174  175          static const fs_operation_def_t tmp_vfsops_template[] = {
 175  176                  VFSNAME_MOUNT,          { .vfs_mount = tmp_mount },
 176  177                  VFSNAME_UNMOUNT,        { .vfs_unmount = tmp_unmount },
 177  178                  VFSNAME_ROOT,           { .vfs_root = tmp_root },
 178  179                  VFSNAME_STATVFS,        { .vfs_statvfs = tmp_statvfs },
 179  180                  VFSNAME_VGET,           { .vfs_vget = tmp_vget },
 180  181                  NULL,                   NULL
 181  182          };
 182  183          int error;
 183  184          extern  void    tmpfs_hash_init();
 184  185  
 185  186          tmpfs_hash_init();
 186  187          tmpfsfstype = fstype;
 187  188          ASSERT(tmpfsfstype != 0);
 188  189  
 189  190          error = vfs_setfsops(fstype, tmp_vfsops_template, NULL);
 190  191          if (error != 0) {
 191  192                  cmn_err(CE_WARN, "tmpfsinit: bad vfs ops template");
 192  193                  return (error);
 193  194          }
 194  195  
 195  196          error = vn_make_ops(name, tmp_vnodeops_template, &tmp_vnodeops);
 196  197          if (error != 0) {
 197  198                  (void) vfs_freevfsops_by_type(fstype);
 198  199                  cmn_err(CE_WARN, "tmpfsinit: bad vnode ops template");
 199  200                  return (error);
 200  201          }
 201  202  
 202  203          /*
 203  204           * tmpfs_minfree doesn't need to be some function of configured
 204  205           * swap space since it really is an absolute limit of swap space
 205  206           * which still allows other processes to execute.
 206  207           */
 207  208          if (tmpfs_minfree == 0) {
 208  209                  /*
 209  210                   * Set if not patched
 210  211                   */
 211  212                  tmpfs_minfree = btopr(TMPMINFREE);
 212  213          }
 213  214  
 214  215          /*
 215  216           * The maximum amount of space tmpfs can allocate is
 216  217           * TMPMAXPROCKMEM percent of kernel memory
 217  218           */
 218  219          if (tmpfs_maxkmem == 0)
 219  220                  tmpfs_maxkmem = MAX(PAGESIZE, kmem_maxavail() / TMPMAXFRACKMEM);
  
    | 
      ↓ open down ↓ | 
    115 lines elided | 
    
      ↑ open up ↑ | 
  
 220  221  
 221  222          if ((tmpfs_major = getudev()) == (major_t)-1) {
 222  223                  cmn_err(CE_WARN, "tmpfsinit: Can't get unique device number.");
 223  224                  tmpfs_major = 0;
 224  225          }
 225  226          mutex_init(&tmpfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
 226  227          return (0);
 227  228  }
 228  229  
 229  230  static int
 230      -tmp_mount(
 231      -        struct vfs *vfsp,
 232      -        struct vnode *mvp,
 233      -        struct mounta *uap,
 234      -        struct cred *cr)
      231 +tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 235  232  {
 236  233          struct tmount *tm = NULL;
 237  234          struct tmpnode *tp;
 238  235          struct pathname dpn;
 239  236          int error;
 240  237          pgcnt_t anonmax;
 241  238          struct vattr rattr;
 242  239          int got_attrs;
      240 +        boolean_t mode_arg = B_FALSE;
      241 +        mode_t root_mode = 0777;
      242 +        char *argstr;
 243  243  
 244      -        char *sizestr;
 245      -
 246  244          if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 247  245                  return (error);
 248  246  
 249  247          if (mvp->v_type != VDIR)
 250  248                  return (ENOTDIR);
 251  249  
 252  250          mutex_enter(&mvp->v_lock);
 253  251          if ((uap->flags & MS_REMOUNT) == 0 && (uap->flags & MS_OVERLAY) == 0 &&
 254  252              (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 255  253                  mutex_exit(&mvp->v_lock);
 256  254                  return (EBUSY);
 257  255          }
 258  256          mutex_exit(&mvp->v_lock);
 259  257  
 260  258          /*
 261  259           * Having the resource be anything but "swap" doesn't make sense.
 262  260           */
 263  261          vfs_setresource(vfsp, "swap", 0);
 264  262  
 265  263          /*
 266  264           * now look for options we understand...
 267  265           */
 268  266  
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
 269  267          /* tmpfs doesn't support read-only mounts */
 270  268          if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
 271  269                  error = EINVAL;
 272  270                  goto out;
 273  271          }
 274  272  
 275  273          /*
 276  274           * tm_anonmax is set according to the mount arguments
 277  275           * if any.  Otherwise, it is set to a maximum value.
 278  276           */
 279      -        if (vfs_optionisset(vfsp, "size", &sizestr)) {
 280      -                if ((error = tmp_convnum(sizestr, &anonmax)) != 0)
      277 +        if (vfs_optionisset(vfsp, "size", &argstr)) {
      278 +                if ((error = tmp_convnum(argstr, &anonmax)) != 0)
 281  279                          goto out;
 282  280          } else {
 283  281                  anonmax = ULONG_MAX;
 284  282          }
 285  283  
      284 +        /*
      285 +         * The "mode" mount argument allows the operator to override the
      286 +         * permissions of the root of the tmpfs mount.
      287 +         */
      288 +        if (vfs_optionisset(vfsp, "mode", &argstr)) {
      289 +                if ((error = tmp_convmode(argstr, &root_mode)) != 0) {
      290 +                        goto out;
      291 +                }
      292 +                mode_arg = B_TRUE;
      293 +        }
      294 +
 286  295          if (error = pn_get(uap->dir,
 287  296              (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn))
 288  297                  goto out;
 289  298  
 290  299          if (uap->flags & MS_REMOUNT) {
 291  300                  tm = (struct tmount *)VFSTOTM(vfsp);
 292  301  
 293  302                  /*
 294  303                   * If we change the size so its less than what is currently
 295  304                   * being used, we allow that. The file system will simply be
 296  305                   * full until enough files have been removed to get below the
 297  306                   * new max.
 298  307                   */
 299  308                  mutex_enter(&tm->tm_contents);
 300  309                  tm->tm_anonmax = anonmax;
 301  310                  mutex_exit(&tm->tm_contents);
 302  311                  goto out;
 303  312          }
 304  313  
 305  314          if ((tm = tmp_memalloc(sizeof (struct tmount), 0)) == NULL) {
 306  315                  pn_free(&dpn);
 307  316                  error = ENOMEM;
 308  317                  goto out;
 309  318          }
 310  319  
 311  320          /*
 312  321           * find an available minor device number for this mount
 313  322           */
 314  323          mutex_enter(&tmpfs_minor_lock);
 315  324          do {
 316  325                  tmpfs_minor = (tmpfs_minor + 1) & L_MAXMIN32;
 317  326                  tm->tm_dev = makedevice(tmpfs_major, tmpfs_minor);
 318  327          } while (vfs_devismounted(tm->tm_dev));
 319  328          mutex_exit(&tmpfs_minor_lock);
 320  329  
 321  330          /*
 322  331           * Set but don't bother entering the mutex
 323  332           * (tmount not on mount list yet)
 324  333           */
 325  334          mutex_init(&tm->tm_contents, NULL, MUTEX_DEFAULT, NULL);
 326  335          mutex_init(&tm->tm_renamelck, NULL, MUTEX_DEFAULT, NULL);
 327  336  
 328  337          tm->tm_vfsp = vfsp;
 329  338          tm->tm_anonmax = anonmax;
 330  339  
 331  340          vfsp->vfs_data = (caddr_t)tm;
 332  341          vfsp->vfs_fstype = tmpfsfstype;
 333  342          vfsp->vfs_dev = tm->tm_dev;
  
    | 
      ↓ open down ↓ | 
    38 lines elided | 
    
      ↑ open up ↑ | 
  
 334  343          vfsp->vfs_bsize = PAGESIZE;
 335  344          vfsp->vfs_flag |= VFS_NOTRUNC;
 336  345          vfs_make_fsid(&vfsp->vfs_fsid, tm->tm_dev, tmpfsfstype);
 337  346          tm->tm_mntpath = tmp_memalloc(dpn.pn_pathlen + 1, TMP_MUSTHAVE);
 338  347          (void) strcpy(tm->tm_mntpath, dpn.pn_path);
 339  348  
 340  349          /*
 341  350           * allocate and initialize root tmpnode structure
 342  351           */
 343  352          bzero(&rattr, sizeof (struct vattr));
 344      -        rattr.va_mode = (mode_t)(S_IFDIR | 0777);       /* XXX modes */
      353 +        rattr.va_mode = (mode_t)(S_IFDIR | root_mode);
 345  354          rattr.va_type = VDIR;
 346  355          rattr.va_rdev = 0;
 347  356          tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
 348  357          tmpnode_init(tm, tp, &rattr, cr);
 349  358  
 350  359          /*
 351  360           * Get the mode, uid, and gid from the underlying mount point.
 352  361           */
 353  362          rattr.va_mask = AT_MODE|AT_UID|AT_GID;  /* Hint to getattr */
 354  363          got_attrs = VOP_GETATTR(mvp, &rattr, 0, cr, NULL);
 355  364  
 356  365          rw_enter(&tp->tn_rwlock, RW_WRITER);
 357  366          TNTOV(tp)->v_flag |= VROOT;
 358  367  
 359  368          /*
 360  369           * If the getattr succeeded, use its results.  Otherwise allow
 361  370           * the previously set hardwired defaults to prevail.
 362  371           */
 363  372          if (got_attrs == 0) {
 364      -                tp->tn_mode = rattr.va_mode;
      373 +                if (!mode_arg) {
      374 +                        /*
      375 +                         * Only use the underlying mount point for the
      376 +                         * mode if the "mode" mount argument was not
      377 +                         * provided.
      378 +                         */
      379 +                        tp->tn_mode = rattr.va_mode;
      380 +                }
 365  381                  tp->tn_uid = rattr.va_uid;
 366  382                  tp->tn_gid = rattr.va_gid;
 367  383          }
 368  384  
 369  385          /*
 370  386           * initialize linked list of tmpnodes so that the back pointer of
 371  387           * the root tmpnode always points to the last one on the list
 372  388           * and the forward pointer of the last node is null
 373  389           */
 374  390          tp->tn_back = tp;
 375  391          tp->tn_forw = NULL;
 376  392          tp->tn_nlink = 0;
 377  393          tm->tm_rootnode = tp;
 378  394  
 379  395          tdirinit(tp, tp);
 380  396  
 381  397          rw_exit(&tp->tn_rwlock);
 382  398  
 383  399          pn_free(&dpn);
 384  400          error = 0;
 385  401  
 386  402  out:
 387  403          if (error == 0)
 388  404                  vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
 389  405  
 390  406          return (error);
 391  407  }
 392  408  
 393  409  static int
 394  410  tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
 395  411  {
 396  412          struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
 397  413          struct tmpnode *tnp, *cancel;
 398  414          struct vnode    *vp;
 399  415          int error;
 400  416  
 401  417          if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
 402  418                  return (error);
 403  419  
 404  420          /*
 405  421           * forced unmount is not supported by this file system
 406  422           * and thus, ENOTSUP, is being returned.
 407  423           */
 408  424          if (flag & MS_FORCE)
 409  425                  return (ENOTSUP);
 410  426  
 411  427          mutex_enter(&tm->tm_contents);
 412  428  
 413  429          /*
 414  430           * If there are no open files, only the root node should have
 415  431           * a reference count.
 416  432           * With tm_contents held, nothing can be added or removed.
 417  433           * There may be some dirty pages.  To prevent fsflush from
 418  434           * disrupting the unmount, put a hold on each node while scanning.
 419  435           * If we find a previously referenced node, undo the holds we have
 420  436           * placed and fail EBUSY.
 421  437           */
 422  438          tnp = tm->tm_rootnode;
 423  439          if (TNTOV(tnp)->v_count > 1) {
 424  440                  mutex_exit(&tm->tm_contents);
 425  441                  return (EBUSY);
 426  442          }
 427  443  
 428  444          for (tnp = tnp->tn_forw; tnp; tnp = tnp->tn_forw) {
 429  445                  if ((vp = TNTOV(tnp))->v_count > 0) {
 430  446                          cancel = tm->tm_rootnode->tn_forw;
 431  447                          while (cancel != tnp) {
 432  448                                  vp = TNTOV(cancel);
 433  449                                  ASSERT(vp->v_count > 0);
 434  450                                  VN_RELE(vp);
 435  451                                  cancel = cancel->tn_forw;
 436  452                          }
 437  453                          mutex_exit(&tm->tm_contents);
 438  454                          return (EBUSY);
 439  455                  }
 440  456                  VN_HOLD(vp);
 441  457          }
 442  458  
 443  459          /*
 444  460           * We can drop the mutex now because no one can find this mount
 445  461           */
 446  462          mutex_exit(&tm->tm_contents);
 447  463  
 448  464          /*
 449  465           * Free all kmemalloc'd and anonalloc'd memory associated with
 450  466           * this filesystem.  To do this, we go through the file list twice,
 451  467           * once to remove all the directory entries, and then to remove
 452  468           * all the files.  We do this because there is useful code in
 453  469           * tmpnode_free which assumes that the directory entry has been
 454  470           * removed before the file.
 455  471           */
 456  472          /*
 457  473           * Remove all directory entries
 458  474           */
 459  475          for (tnp = tm->tm_rootnode; tnp; tnp = tnp->tn_forw) {
 460  476                  rw_enter(&tnp->tn_rwlock, RW_WRITER);
 461  477                  if (tnp->tn_type == VDIR)
 462  478                          tdirtrunc(tnp);
 463  479                  if (tnp->tn_vnode->v_flag & V_XATTRDIR) {
 464  480                          /*
 465  481                           * Account for implicit attrdir reference.
 466  482                           */
 467  483                          ASSERT(tnp->tn_nlink > 0);
 468  484                          DECR_COUNT(&tnp->tn_nlink, &tnp->tn_tlock);
 469  485                  }
 470  486                  rw_exit(&tnp->tn_rwlock);
 471  487          }
 472  488  
 473  489          ASSERT(tm->tm_rootnode);
 474  490  
 475  491          /*
 476  492           * All links are gone, v_count is keeping nodes in place.
 477  493           * VN_RELE should make the node disappear, unless somebody
 478  494           * is holding pages against it.  Nap and retry until it disappears.
 479  495           *
 480  496           * We re-acquire the lock to prevent others who have a HOLD on
 481  497           * a tmpnode via its pages or anon slots from blowing it away
 482  498           * (in tmp_inactive) while we're trying to get to it here. Once
 483  499           * we have a HOLD on it we know it'll stick around.
 484  500           *
 485  501           */
 486  502          mutex_enter(&tm->tm_contents);
 487  503          /*
 488  504           * Remove all the files (except the rootnode) backwards.
 489  505           */
 490  506          while ((tnp = tm->tm_rootnode->tn_back) != tm->tm_rootnode) {
 491  507                  mutex_exit(&tm->tm_contents);
 492  508                  /*
 493  509                   * Inhibit tmp_inactive from touching attribute directory
 494  510                   * as all nodes will be released here.
 495  511                   * Note we handled the link count in pass 2 above.
 496  512                   */
 497  513                  rw_enter(&tnp->tn_rwlock, RW_WRITER);
 498  514                  tnp->tn_xattrdp = NULL;
 499  515                  rw_exit(&tnp->tn_rwlock);
 500  516                  vp = TNTOV(tnp);
 501  517                  VN_RELE(vp);
 502  518                  mutex_enter(&tm->tm_contents);
 503  519                  /*
 504  520                   * It's still there after the RELE. Someone else like pageout
 505  521                   * has a hold on it so wait a bit and then try again - we know
 506  522                   * they'll give it up soon.
 507  523                   */
 508  524                  if (tnp == tm->tm_rootnode->tn_back) {
 509  525                          VN_HOLD(vp);
 510  526                          mutex_exit(&tm->tm_contents);
 511  527                          delay(hz / 4);
 512  528                          mutex_enter(&tm->tm_contents);
 513  529                  }
 514  530          }
 515  531          mutex_exit(&tm->tm_contents);
 516  532  
 517  533          tm->tm_rootnode->tn_xattrdp = NULL;
 518  534          VN_RELE(TNTOV(tm->tm_rootnode));
 519  535  
 520  536          ASSERT(tm->tm_mntpath);
 521  537  
 522  538          tmp_memfree(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
 523  539  
 524  540          ASSERT(tm->tm_anonmem == 0);
 525  541  
 526  542          mutex_destroy(&tm->tm_contents);
 527  543          mutex_destroy(&tm->tm_renamelck);
 528  544          tmp_memfree(tm, sizeof (struct tmount));
 529  545  
 530  546          return (0);
 531  547  }
 532  548  
 533  549  /*
 534  550   * return root tmpnode for given vnode
 535  551   */
 536  552  static int
 537  553  tmp_root(struct vfs *vfsp, struct vnode **vpp)
 538  554  {
 539  555          struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
 540  556          struct tmpnode *tp = tm->tm_rootnode;
 541  557          struct vnode *vp;
 542  558  
 543  559          ASSERT(tp);
 544  560  
 545  561          vp = TNTOV(tp);
 546  562          VN_HOLD(vp);
 547  563          *vpp = vp;
 548  564          return (0);
 549  565  }
 550  566  
 551  567  static int
 552  568  tmp_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
 553  569  {
 554  570          struct tmount   *tm = (struct tmount *)VFSTOTM(vfsp);
 555  571          ulong_t blocks;
 556  572          dev32_t d32;
 557  573          zoneid_t eff_zid;
 558  574          struct zone *zp;
 559  575  
 560  576          /*
 561  577           * The file system may have been mounted by the global zone on
 562  578           * behalf of the non-global zone.  In that case, the tmount zone_id
 563  579           * will be the global zone.  We still want to show the swap cap inside
 564  580           * the zone in this case, even though the file system was mounted by
 565  581           * the global zone.
 566  582           */
 567  583          if (curproc->p_zone->zone_id != GLOBAL_ZONEUNIQID)
 568  584                  zp = curproc->p_zone;
 569  585          else
 570  586                  zp = tm->tm_vfsp->vfs_zone;
 571  587  
 572  588          if (zp == NULL)
 573  589                  eff_zid = GLOBAL_ZONEUNIQID;
 574  590          else
 575  591                  eff_zid = zp->zone_id;
 576  592  
 577  593          sbp->f_bsize = PAGESIZE;
 578  594          sbp->f_frsize = PAGESIZE;
 579  595  
 580  596          /*
 581  597           * Find the amount of available physical and memory swap
 582  598           */
 583  599          mutex_enter(&anoninfo_lock);
 584  600          ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
 585  601          blocks = (ulong_t)CURRENT_TOTAL_AVAILABLE_SWAP;
 586  602          mutex_exit(&anoninfo_lock);
 587  603  
 588  604          /*
 589  605           * If tm_anonmax for this mount is less than the available swap space
 590  606           * (minus the amount tmpfs can't use), use that instead
 591  607           */
 592  608          if (blocks > tmpfs_minfree)
 593  609                  sbp->f_bfree = MIN(blocks - tmpfs_minfree,
 594  610                      tm->tm_anonmax - tm->tm_anonmem);
 595  611          else
 596  612                  sbp->f_bfree = 0;
 597  613  
 598  614          sbp->f_bavail = sbp->f_bfree;
 599  615  
 600  616          /*
 601  617           * Total number of blocks is what's available plus what's been used
 602  618           */
 603  619          sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + tm->tm_anonmem);
 604  620  
 605  621          if (eff_zid != GLOBAL_ZONEUNIQID &&
 606  622              zp->zone_max_swap_ctl != UINT64_MAX) {
 607  623                  /*
 608  624                   * If the fs is used by a non-global zone with a swap cap,
 609  625                   * then report the capped size.
 610  626                   */
 611  627                  rctl_qty_t cap, used;
 612  628                  pgcnt_t pgcap, pgused;
 613  629  
 614  630                  mutex_enter(&zp->zone_mem_lock);
 615  631                  cap = zp->zone_max_swap_ctl;
 616  632                  used = zp->zone_max_swap;
 617  633                  mutex_exit(&zp->zone_mem_lock);
 618  634  
 619  635                  pgcap = btop(cap);
 620  636                  pgused = btop(used);
 621  637  
 622  638                  sbp->f_bfree = MIN(pgcap - pgused, sbp->f_bfree);
 623  639                  sbp->f_bavail = sbp->f_bfree;
 624  640                  sbp->f_blocks = MIN(pgcap, sbp->f_blocks);
 625  641          }
 626  642  
 627  643          /*
 628  644           * The maximum number of files available is approximately the number
 629  645           * of tmpnodes we can allocate from the remaining kernel memory
 630  646           * available to tmpfs.  This is fairly inaccurate since it doesn't
 631  647           * take into account the names stored in the directory entries.
 632  648           */
 633  649          if (tmpfs_maxkmem > tmp_kmemspace)
 634  650                  sbp->f_ffree = (tmpfs_maxkmem - tmp_kmemspace) /
 635  651                      (sizeof (struct tmpnode) + sizeof (struct tdirent));
 636  652          else
 637  653                  sbp->f_ffree = 0;
 638  654  
 639  655          sbp->f_files = tmpfs_maxkmem /
 640  656              (sizeof (struct tmpnode) + sizeof (struct tdirent));
 641  657          sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
 642  658          (void) cmpldev(&d32, vfsp->vfs_dev);
 643  659          sbp->f_fsid = d32;
 644  660          (void) strcpy(sbp->f_basetype, vfssw[tmpfsfstype].vsw_name);
 645  661          (void) strncpy(sbp->f_fstr, tm->tm_mntpath, sizeof (sbp->f_fstr));
 646  662          /*
 647  663           * ensure null termination
 648  664           */
 649  665          sbp->f_fstr[sizeof (sbp->f_fstr) - 1] = '\0';
 650  666          sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
 651  667          sbp->f_namemax = MAXNAMELEN - 1;
 652  668          return (0);
 653  669  }
 654  670  
 655  671  static int
 656  672  tmp_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
 657  673  {
 658  674          struct tfid *tfid;
 659  675          struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
 660  676          struct tmpnode *tp = NULL;
 661  677  
 662  678          tfid = (struct tfid *)fidp;
 663  679          *vpp = NULL;
 664  680  
 665  681          mutex_enter(&tm->tm_contents);
 666  682          for (tp = tm->tm_rootnode; tp; tp = tp->tn_forw) {
 667  683                  mutex_enter(&tp->tn_tlock);
 668  684                  if (tp->tn_nodeid == tfid->tfid_ino) {
 669  685                          /*
 670  686                           * If the gen numbers don't match we know the
 671  687                           * file won't be found since only one tmpnode
 672  688                           * can have this number at a time.
 673  689                           */
 674  690                          if (tp->tn_gen != tfid->tfid_gen || tp->tn_nlink == 0) {
 675  691                                  mutex_exit(&tp->tn_tlock);
 676  692                                  mutex_exit(&tm->tm_contents);
 677  693                                  return (0);
 678  694                          }
 679  695                          *vpp = (struct vnode *)TNTOV(tp);
 680  696  
 681  697                          VN_HOLD(*vpp);
 682  698  
 683  699                          if ((tp->tn_mode & S_ISVTX) &&
 684  700                              !(tp->tn_mode & (S_IXUSR | S_IFDIR))) {
 685  701                                  mutex_enter(&(*vpp)->v_lock);
 686  702                                  (*vpp)->v_flag |= VISSWAP;
 687  703                                  mutex_exit(&(*vpp)->v_lock);
 688  704                          }
 689  705                          mutex_exit(&tp->tn_tlock);
 690  706                          mutex_exit(&tm->tm_contents);
 691  707                          return (0);
 692  708                  }
 693  709                  mutex_exit(&tp->tn_tlock);
 694  710          }
 695  711          mutex_exit(&tm->tm_contents);
 696  712          return (0);
 697  713  }
  
    | 
      ↓ open down ↓ | 
    323 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX