big-one Wdiff usr/src/uts/common/fs/tmpfs/tmp_vfsops.c

Print this page

re #13613 rb4516 Tunables needs volatile keyword

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
          +++ new/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *

↓ open down ↓

12 lines elided

↑ open up ↑

  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  23   24   * Copyright 2015 Joyent, Inc.
  24   25   */
  25   26  
  26   27  #include <sys/types.h>
  27   28  #include <sys/param.h>
  28   29  #include <sys/sysmacros.h>
  29   30  #include <sys/kmem.h>
  30   31  #include <sys/time.h>
  31   32  #include <sys/pathname.h>
  32   33  #include <sys/vfs.h>

  33   34  #include <sys/vfs_opreg.h>
  34   35  #include <sys/vnode.h>
  35   36  #include <sys/stat.h>
  36   37  #include <sys/uio.h>
  37   38  #include <sys/stat.h>
  38   39  #include <sys/errno.h>
  39   40  #include <sys/cmn_err.h>
  40   41  #include <sys/cred.h>
  41   42  #include <sys/statvfs.h>
  42   43  #include <sys/mount.h>
  43   44  #include <sys/debug.h>
  44   45  #include <sys/systm.h>
  45   46  #include <sys/mntent.h>
  46   47  #include <fs/fs_subr.h>
  47   48  #include <vm/page.h>
  48   49  #include <vm/anon.h>
  49   50  #include <sys/model.h>
  50   51  #include <sys/policy.h>
  51   52  
  52   53  #include <sys/fs/swapnode.h>
  53   54  #include <sys/fs/tmp.h>
  54   55  #include <sys/fs/tmpnode.h>
  55   56  
  56   57  static int tmpfsfstype;
  57   58  
  58   59  /*
  59   60   * tmpfs vfs operations.
  60   61   */
  61   62  static int tmpfsinit(int, char *);
  62   63  static int tmp_mount(struct vfs *, struct vnode *,
  63   64          struct mounta *, struct cred *);
  64   65  static int tmp_unmount(struct vfs *, int, struct cred *);
  65   66  static int tmp_root(struct vfs *, struct vnode **);
  66   67  static int tmp_statvfs(struct vfs *, struct statvfs64 *);
  67   68  static int tmp_vget(struct vfs *, struct vnode **, struct fid *);
  68   69  
  69   70  /*
  70   71   * Loadable module wrapper
  71   72   */
  72   73  #include <sys/modctl.h>
  73   74  
  74   75  static mntopts_t tmpfs_proto_opttbl;
  75   76  
  76   77  static vfsdef_t vfw = {
  77   78          VFSDEF_VERSION,
  78   79          "tmpfs",
  79   80          tmpfsinit,
  80   81          VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_ZMOUNT,
  81   82          &tmpfs_proto_opttbl
  82   83  };
  83   84  
  84   85  /*
  85   86   * in-kernel mnttab options
  86   87   */
  87   88  static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
  88   89  static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
  89   90  
  90   91  static mntopt_t tmpfs_options[] = {
  91   92          /* Option name          Cancel Opt      Arg     Flags           Data */
  92   93          { MNTOPT_XATTR,         xattr_cancel,   NULL,   MO_DEFAULT,     NULL},
  93   94          { MNTOPT_NOXATTR,       noxattr_cancel, NULL,   NULL,           NULL},
  94   95          { "size",               NULL,           "0",    MO_HASVALUE,    NULL},
  95   96          { "mode",               NULL,           NULL,   MO_HASVALUE,    NULL}
  96   97  };
  97   98  
  98   99  
  99  100  static mntopts_t tmpfs_proto_opttbl = {
 100  101          sizeof (tmpfs_options) / sizeof (mntopt_t),
 101  102          tmpfs_options
 102  103  };
 103  104  
 104  105  /*
 105  106   * Module linkage information
 106  107   */
 107  108  static struct modlfs modlfs = {
 108  109          &mod_fsops, "filesystem for tmpfs", &vfw
 109  110  };
 110  111  
 111  112  static struct modlinkage modlinkage = {
 112  113          MODREV_1, &modlfs, NULL
 113  114  };
 114  115  
 115  116  int
 116  117  _init()
 117  118  {
 118  119          return (mod_install(&modlinkage));
 119  120  }
 120  121  
 121  122  int
 122  123  _fini()
 123  124  {
 124  125          int error;
 125  126  
 126  127          error = mod_remove(&modlinkage);
 127  128          if (error)
 128  129                  return (error);
 129  130          /*
 130  131           * Tear down the operations vectors
 131  132           */
 132  133          (void) vfs_freevfsops_by_type(tmpfsfstype);
 133  134          vn_freevnodeops(tmp_vnodeops);
 134  135          return (0);
 135  136  }
 136  137  
 137  138  int
 138  139  _info(struct modinfo *modinfop)
 139  140  {
 140  141          return (mod_info(&modlinkage, modinfop));
 141  142  }
 142  143  
 143  144  /*
 144  145   * The following are patchable variables limiting the amount of system
 145  146   * resources tmpfs can use.
 146  147   *
 147  148   * tmpfs_maxkmem limits the amount of kernel kmem_alloc memory
 148  149   * tmpfs can use for it's data structures (e.g. tmpnodes, directory entries)
 149  150   * It is not determined by setting a hard limit but rather as a percentage of

↓ open down ↓

117 lines elided

↑ open up ↑

 150  151   * physical memory which is determined when tmpfs is first used in the system.
 151  152   *
 152  153   * tmpfs_minfree is the minimum amount of swap space that tmpfs leaves for
 153  154   * the rest of the system.  In other words, if the amount of free swap space
 154  155   * in the system (i.e. anoninfo.ani_free) drops below tmpfs_minfree, tmpfs
 155  156   * anon allocations will fail.
 156  157   *
 157  158   * There is also a per mount limit on the amount of swap space
 158  159   * (tmount.tm_anonmax) settable via a mount option.
 159  160   */
 160      -size_t tmpfs_maxkmem = 0;
 161      -size_t tmpfs_minfree = 0;
      161 +volatile size_t tmpfs_maxkmem = 0;
      162 +volatile size_t tmpfs_minfree = 0;
 162  163  size_t tmp_kmemspace;           /* bytes of kernel heap used by all tmpfs */
 163  164  
 164  165  static major_t tmpfs_major;
 165  166  static minor_t tmpfs_minor;
 166  167  static kmutex_t tmpfs_minor_lock;
 167  168  
 168  169  /*
 169  170   * initialize global tmpfs locks and such
 170  171   * called when loading tmpfs module
 171  172   */

 172  173  static int
 173  174  tmpfsinit(int fstype, char *name)
 174  175  {
 175  176          static const fs_operation_def_t tmp_vfsops_template[] = {
 176  177                  VFSNAME_MOUNT,          { .vfs_mount = tmp_mount },
 177  178                  VFSNAME_UNMOUNT,        { .vfs_unmount = tmp_unmount },
 178  179                  VFSNAME_ROOT,           { .vfs_root = tmp_root },
 179  180                  VFSNAME_STATVFS,        { .vfs_statvfs = tmp_statvfs },
 180  181                  VFSNAME_VGET,           { .vfs_vget = tmp_vget },
 181  182                  NULL,                   NULL
 182  183          };
 183  184          int error;
 184  185          extern  void    tmpfs_hash_init();
 185  186  
 186  187          tmpfs_hash_init();
 187  188          tmpfsfstype = fstype;
 188  189          ASSERT(tmpfsfstype != 0);
 189  190  
 190  191          error = vfs_setfsops(fstype, tmp_vfsops_template, NULL);
 191  192          if (error != 0) {
 192  193                  cmn_err(CE_WARN, "tmpfsinit: bad vfs ops template");
 193  194                  return (error);
 194  195          }
 195  196  
 196  197          error = vn_make_ops(name, tmp_vnodeops_template, &tmp_vnodeops);
 197  198          if (error != 0) {
 198  199                  (void) vfs_freevfsops_by_type(fstype);
 199  200                  cmn_err(CE_WARN, "tmpfsinit: bad vnode ops template");
 200  201                  return (error);
 201  202          }
 202  203  
 203  204          /*
 204  205           * tmpfs_minfree doesn't need to be some function of configured
 205  206           * swap space since it really is an absolute limit of swap space
 206  207           * which still allows other processes to execute.
 207  208           */
 208  209          if (tmpfs_minfree == 0) {
 209  210                  /*
 210  211                   * Set if not patched
 211  212                   */
 212  213                  tmpfs_minfree = btopr(TMPMINFREE);
 213  214          }
 214  215  
 215  216          /*
 216  217           * The maximum amount of space tmpfs can allocate is
 217  218           * TMPMAXPROCKMEM percent of kernel memory
 218  219           */
 219  220          if (tmpfs_maxkmem == 0)
 220  221                  tmpfs_maxkmem = MAX(PAGESIZE, kmem_maxavail() / TMPMAXFRACKMEM);
 221  222  
 222  223          if ((tmpfs_major = getudev()) == (major_t)-1) {
 223  224                  cmn_err(CE_WARN, "tmpfsinit: Can't get unique device number.");
 224  225                  tmpfs_major = 0;
 225  226          }
 226  227          mutex_init(&tmpfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
 227  228          return (0);
 228  229  }
 229  230  
 230  231  static int
 231  232  tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 232  233  {
 233  234          struct tmount *tm = NULL;
 234  235          struct tmpnode *tp;
 235  236          struct pathname dpn;
 236  237          int error;
 237  238          pgcnt_t anonmax;
 238  239          struct vattr rattr;
 239  240          int got_attrs;
 240  241          boolean_t mode_arg = B_FALSE;
 241  242          mode_t root_mode = 0777;
 242  243          char *argstr;
 243  244  
 244  245          if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 245  246                  return (error);
 246  247  
 247  248          if (mvp->v_type != VDIR)
 248  249                  return (ENOTDIR);
 249  250  
 250  251          mutex_enter(&mvp->v_lock);
 251  252          if ((uap->flags & MS_REMOUNT) == 0 && (uap->flags & MS_OVERLAY) == 0 &&
 252  253              (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 253  254                  mutex_exit(&mvp->v_lock);
 254  255                  return (EBUSY);
 255  256          }
 256  257          mutex_exit(&mvp->v_lock);
 257  258  
 258  259          /*
 259  260           * Having the resource be anything but "swap" doesn't make sense.
 260  261           */
 261  262          vfs_setresource(vfsp, "swap", 0);
 262  263  
 263  264          /*
 264  265           * now look for options we understand...
 265  266           */
 266  267  
 267  268          /* tmpfs doesn't support read-only mounts */
 268  269          if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
 269  270                  error = EINVAL;
 270  271                  goto out;
 271  272          }
 272  273  
 273  274          /*
 274  275           * tm_anonmax is set according to the mount arguments
 275  276           * if any.  Otherwise, it is set to a maximum value.
 276  277           */
 277  278          if (vfs_optionisset(vfsp, "size", &argstr)) {
 278  279                  if ((error = tmp_convnum(argstr, &anonmax)) != 0)
 279  280                          goto out;
 280  281          } else {
 281  282                  anonmax = ULONG_MAX;
 282  283          }
 283  284  
 284  285          /*
 285  286           * The "mode" mount argument allows the operator to override the
 286  287           * permissions of the root of the tmpfs mount.
 287  288           */
 288  289          if (vfs_optionisset(vfsp, "mode", &argstr)) {
 289  290                  if ((error = tmp_convmode(argstr, &root_mode)) != 0) {
 290  291                          goto out;
 291  292                  }
 292  293                  mode_arg = B_TRUE;
 293  294          }
 294  295  
 295  296          if (error = pn_get(uap->dir,
 296  297              (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn))
 297  298                  goto out;
 298  299  
 299  300          if (uap->flags & MS_REMOUNT) {
 300  301                  tm = (struct tmount *)VFSTOTM(vfsp);
 301  302  
 302  303                  /*
 303  304                   * If we change the size so its less than what is currently
 304  305                   * being used, we allow that. The file system will simply be
 305  306                   * full until enough files have been removed to get below the
 306  307                   * new max.
 307  308                   */
 308  309                  mutex_enter(&tm->tm_contents);
 309  310                  tm->tm_anonmax = anonmax;
 310  311                  mutex_exit(&tm->tm_contents);
 311  312                  goto out;
 312  313          }
 313  314  
 314  315          if ((tm = tmp_memalloc(sizeof (struct tmount), 0)) == NULL) {
 315  316                  pn_free(&dpn);
 316  317                  error = ENOMEM;
 317  318                  goto out;
 318  319          }
 319  320  
 320  321          /*
 321  322           * find an available minor device number for this mount
 322  323           */
 323  324          mutex_enter(&tmpfs_minor_lock);
 324  325          do {
 325  326                  tmpfs_minor = (tmpfs_minor + 1) & L_MAXMIN32;
 326  327                  tm->tm_dev = makedevice(tmpfs_major, tmpfs_minor);
 327  328          } while (vfs_devismounted(tm->tm_dev));
 328  329          mutex_exit(&tmpfs_minor_lock);
 329  330  
 330  331          /*
 331  332           * Set but don't bother entering the mutex
 332  333           * (tmount not on mount list yet)
 333  334           */
 334  335          mutex_init(&tm->tm_contents, NULL, MUTEX_DEFAULT, NULL);
 335  336          mutex_init(&tm->tm_renamelck, NULL, MUTEX_DEFAULT, NULL);
 336  337  
 337  338          tm->tm_vfsp = vfsp;
 338  339          tm->tm_anonmax = anonmax;
 339  340  
 340  341          vfsp->vfs_data = (caddr_t)tm;
 341  342          vfsp->vfs_fstype = tmpfsfstype;
 342  343          vfsp->vfs_dev = tm->tm_dev;
 343  344          vfsp->vfs_bsize = PAGESIZE;
 344  345          vfsp->vfs_flag |= VFS_NOTRUNC;
 345  346          vfs_make_fsid(&vfsp->vfs_fsid, tm->tm_dev, tmpfsfstype);
 346  347          tm->tm_mntpath = tmp_memalloc(dpn.pn_pathlen + 1, TMP_MUSTHAVE);
 347  348          (void) strcpy(tm->tm_mntpath, dpn.pn_path);
 348  349  
 349  350          /*
 350  351           * allocate and initialize root tmpnode structure
 351  352           */
 352  353          bzero(&rattr, sizeof (struct vattr));
 353  354          rattr.va_mode = (mode_t)(S_IFDIR | root_mode);
 354  355          rattr.va_type = VDIR;
 355  356          rattr.va_rdev = 0;
 356  357          tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
 357  358          tmpnode_init(tm, tp, &rattr, cr);
 358  359  
 359  360          /*
 360  361           * Get the mode, uid, and gid from the underlying mount point.
 361  362           */
 362  363          rattr.va_mask = AT_MODE|AT_UID|AT_GID;  /* Hint to getattr */
 363  364          got_attrs = VOP_GETATTR(mvp, &rattr, 0, cr, NULL);
 364  365  
 365  366          rw_enter(&tp->tn_rwlock, RW_WRITER);
 366  367          TNTOV(tp)->v_flag |= VROOT;
 367  368  
 368  369          /*
 369  370           * If the getattr succeeded, use its results.  Otherwise allow
 370  371           * the previously set hardwired defaults to prevail.
 371  372           */
 372  373          if (got_attrs == 0) {
 373  374                  if (!mode_arg) {
 374  375                          /*
 375  376                           * Only use the underlying mount point for the
 376  377                           * mode if the "mode" mount argument was not
 377  378                           * provided.
 378  379                           */
 379  380                          tp->tn_mode = rattr.va_mode;
 380  381                  }
 381  382                  tp->tn_uid = rattr.va_uid;
 382  383                  tp->tn_gid = rattr.va_gid;
 383  384          }
 384  385  
 385  386          /*
 386  387           * initialize linked list of tmpnodes so that the back pointer of
 387  388           * the root tmpnode always points to the last one on the list
 388  389           * and the forward pointer of the last node is null
 389  390           */
 390  391          tp->tn_back = tp;
 391  392          tp->tn_forw = NULL;
 392  393          tp->tn_nlink = 0;
 393  394          tm->tm_rootnode = tp;
 394  395  
 395  396          tdirinit(tp, tp);
 396  397  
 397  398          rw_exit(&tp->tn_rwlock);
 398  399  
 399  400          pn_free(&dpn);
 400  401          error = 0;
 401  402  
 402  403  out:
 403  404          if (error == 0)
 404  405                  vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
 405  406  
 406  407          return (error);
 407  408  }
 408  409  
 409  410  static int
 410  411  tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
 411  412  {
 412  413          struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
 413  414          struct tmpnode *tnp, *cancel;
 414  415          struct vnode    *vp;
 415  416          int error;
 416  417  
 417  418          if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
 418  419                  return (error);
 419  420  
 420  421          /*
 421  422           * forced unmount is not supported by this file system
 422  423           * and thus, ENOTSUP, is being returned.
 423  424           */
 424  425          if (flag & MS_FORCE)
 425  426                  return (ENOTSUP);
 426  427  
 427  428          mutex_enter(&tm->tm_contents);
 428  429  
 429  430          /*
 430  431           * If there are no open files, only the root node should have
 431  432           * a reference count.
 432  433           * With tm_contents held, nothing can be added or removed.
 433  434           * There may be some dirty pages.  To prevent fsflush from
 434  435           * disrupting the unmount, put a hold on each node while scanning.
 435  436           * If we find a previously referenced node, undo the holds we have
 436  437           * placed and fail EBUSY.
 437  438           */
 438  439          tnp = tm->tm_rootnode;
 439  440          if (TNTOV(tnp)->v_count > 1) {
 440  441                  mutex_exit(&tm->tm_contents);
 441  442                  return (EBUSY);
 442  443          }
 443  444  
 444  445          for (tnp = tnp->tn_forw; tnp; tnp = tnp->tn_forw) {
 445  446                  if ((vp = TNTOV(tnp))->v_count > 0) {
 446  447                          cancel = tm->tm_rootnode->tn_forw;
 447  448                          while (cancel != tnp) {
 448  449                                  vp = TNTOV(cancel);
 449  450                                  ASSERT(vp->v_count > 0);
 450  451                                  VN_RELE(vp);
 451  452                                  cancel = cancel->tn_forw;
 452  453                          }
 453  454                          mutex_exit(&tm->tm_contents);
 454  455                          return (EBUSY);
 455  456                  }
 456  457                  VN_HOLD(vp);
 457  458          }
 458  459  
 459  460          /*
 460  461           * We can drop the mutex now because no one can find this mount
 461  462           */
 462  463          mutex_exit(&tm->tm_contents);
 463  464  
 464  465          /*
 465  466           * Free all kmemalloc'd and anonalloc'd memory associated with
 466  467           * this filesystem.  To do this, we go through the file list twice,
 467  468           * once to remove all the directory entries, and then to remove
 468  469           * all the files.  We do this because there is useful code in
 469  470           * tmpnode_free which assumes that the directory entry has been
 470  471           * removed before the file.
 471  472           */
 472  473          /*
 473  474           * Remove all directory entries
 474  475           */
 475  476          for (tnp = tm->tm_rootnode; tnp; tnp = tnp->tn_forw) {
 476  477                  rw_enter(&tnp->tn_rwlock, RW_WRITER);
 477  478                  if (tnp->tn_type == VDIR)
 478  479                          tdirtrunc(tnp);
 479  480                  if (tnp->tn_vnode->v_flag & V_XATTRDIR) {
 480  481                          /*
 481  482                           * Account for implicit attrdir reference.
 482  483                           */
 483  484                          ASSERT(tnp->tn_nlink > 0);
 484  485                          DECR_COUNT(&tnp->tn_nlink, &tnp->tn_tlock);
 485  486                  }
 486  487                  rw_exit(&tnp->tn_rwlock);
 487  488          }
 488  489  
 489  490          ASSERT(tm->tm_rootnode);
 490  491  
 491  492          /*
 492  493           * All links are gone, v_count is keeping nodes in place.
 493  494           * VN_RELE should make the node disappear, unless somebody
 494  495           * is holding pages against it.  Nap and retry until it disappears.
 495  496           *
 496  497           * We re-acquire the lock to prevent others who have a HOLD on
 497  498           * a tmpnode via its pages or anon slots from blowing it away
 498  499           * (in tmp_inactive) while we're trying to get to it here. Once
 499  500           * we have a HOLD on it we know it'll stick around.
 500  501           *
 501  502           */
 502  503          mutex_enter(&tm->tm_contents);
 503  504          /*
 504  505           * Remove all the files (except the rootnode) backwards.
 505  506           */
 506  507          while ((tnp = tm->tm_rootnode->tn_back) != tm->tm_rootnode) {
 507  508                  mutex_exit(&tm->tm_contents);
 508  509                  /*
 509  510                   * Inhibit tmp_inactive from touching attribute directory
 510  511                   * as all nodes will be released here.
 511  512                   * Note we handled the link count in pass 2 above.
 512  513                   */
 513  514                  rw_enter(&tnp->tn_rwlock, RW_WRITER);
 514  515                  tnp->tn_xattrdp = NULL;
 515  516                  rw_exit(&tnp->tn_rwlock);
 516  517                  vp = TNTOV(tnp);
 517  518                  VN_RELE(vp);
 518  519                  mutex_enter(&tm->tm_contents);
 519  520                  /*
 520  521                   * It's still there after the RELE. Someone else like pageout
 521  522                   * has a hold on it so wait a bit and then try again - we know
 522  523                   * they'll give it up soon.
 523  524                   */
 524  525                  if (tnp == tm->tm_rootnode->tn_back) {
 525  526                          VN_HOLD(vp);
 526  527                          mutex_exit(&tm->tm_contents);
 527  528                          delay(hz / 4);
 528  529                          mutex_enter(&tm->tm_contents);
 529  530                  }
 530  531          }
 531  532          mutex_exit(&tm->tm_contents);
 532  533  
 533  534          tm->tm_rootnode->tn_xattrdp = NULL;
 534  535          VN_RELE(TNTOV(tm->tm_rootnode));
 535  536  
 536  537          ASSERT(tm->tm_mntpath);
 537  538  
 538  539          tmp_memfree(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
 539  540  
 540  541          ASSERT(tm->tm_anonmem == 0);
 541  542  
 542  543          mutex_destroy(&tm->tm_contents);
 543  544          mutex_destroy(&tm->tm_renamelck);
 544  545          tmp_memfree(tm, sizeof (struct tmount));
 545  546  
 546  547          return (0);
 547  548  }
 548  549  
 549  550  /*
 550  551   * return root tmpnode for given vnode
 551  552   */
 552  553  static int
 553  554  tmp_root(struct vfs *vfsp, struct vnode **vpp)
 554  555  {
 555  556          struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
 556  557          struct tmpnode *tp = tm->tm_rootnode;
 557  558          struct vnode *vp;
 558  559  
 559  560          ASSERT(tp);
 560  561  
 561  562          vp = TNTOV(tp);
 562  563          VN_HOLD(vp);
 563  564          *vpp = vp;
 564  565          return (0);
 565  566  }
 566  567  
 567  568  static int
 568  569  tmp_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
 569  570  {
 570  571          struct tmount   *tm = (struct tmount *)VFSTOTM(vfsp);
 571  572          ulong_t blocks;
 572  573          dev32_t d32;
 573  574          zoneid_t eff_zid;
 574  575          struct zone *zp;
 575  576  
 576  577          /*
 577  578           * The file system may have been mounted by the global zone on
 578  579           * behalf of the non-global zone.  In that case, the tmount zone_id
 579  580           * will be the global zone.  We still want to show the swap cap inside
 580  581           * the zone in this case, even though the file system was mounted by
 581  582           * the global zone.
 582  583           */
 583  584          if (curproc->p_zone->zone_id != GLOBAL_ZONEUNIQID)
 584  585                  zp = curproc->p_zone;
 585  586          else
 586  587                  zp = tm->tm_vfsp->vfs_zone;
 587  588  
 588  589          if (zp == NULL)
 589  590                  eff_zid = GLOBAL_ZONEUNIQID;
 590  591          else
 591  592                  eff_zid = zp->zone_id;
 592  593  
 593  594          sbp->f_bsize = PAGESIZE;
 594  595          sbp->f_frsize = PAGESIZE;
 595  596  
 596  597          /*
 597  598           * Find the amount of available physical and memory swap
 598  599           */
 599  600          mutex_enter(&anoninfo_lock);
 600  601          ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
 601  602          blocks = (ulong_t)CURRENT_TOTAL_AVAILABLE_SWAP;
 602  603          mutex_exit(&anoninfo_lock);
 603  604  
 604  605          /*
 605  606           * If tm_anonmax for this mount is less than the available swap space
 606  607           * (minus the amount tmpfs can't use), use that instead
 607  608           */
 608  609          if (blocks > tmpfs_minfree)
 609  610                  sbp->f_bfree = MIN(blocks - tmpfs_minfree,
 610  611                      tm->tm_anonmax - tm->tm_anonmem);
 611  612          else
 612  613                  sbp->f_bfree = 0;
 613  614  
 614  615          sbp->f_bavail = sbp->f_bfree;
 615  616  
 616  617          /*
 617  618           * Total number of blocks is what's available plus what's been used
 618  619           */
 619  620          sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + tm->tm_anonmem);
 620  621  
 621  622          if (eff_zid != GLOBAL_ZONEUNIQID &&
 622  623              zp->zone_max_swap_ctl != UINT64_MAX) {
 623  624                  /*
 624  625                   * If the fs is used by a non-global zone with a swap cap,
 625  626                   * then report the capped size.
 626  627                   */
 627  628                  rctl_qty_t cap, used;
 628  629                  pgcnt_t pgcap, pgused;
 629  630  
 630  631                  mutex_enter(&zp->zone_mem_lock);
 631  632                  cap = zp->zone_max_swap_ctl;
 632  633                  used = zp->zone_max_swap;
 633  634                  mutex_exit(&zp->zone_mem_lock);
 634  635  
 635  636                  pgcap = btop(cap);
 636  637                  pgused = btop(used);
 637  638  
 638  639                  sbp->f_bfree = MIN(pgcap - pgused, sbp->f_bfree);
 639  640                  sbp->f_bavail = sbp->f_bfree;
 640  641                  sbp->f_blocks = MIN(pgcap, sbp->f_blocks);
 641  642          }
 642  643  
 643  644          /*
 644  645           * The maximum number of files available is approximately the number
 645  646           * of tmpnodes we can allocate from the remaining kernel memory
 646  647           * available to tmpfs.  This is fairly inaccurate since it doesn't
 647  648           * take into account the names stored in the directory entries.
 648  649           */
 649  650          if (tmpfs_maxkmem > tmp_kmemspace)
 650  651                  sbp->f_ffree = (tmpfs_maxkmem - tmp_kmemspace) /
 651  652                      (sizeof (struct tmpnode) + sizeof (struct tdirent));
 652  653          else
 653  654                  sbp->f_ffree = 0;
 654  655  
 655  656          sbp->f_files = tmpfs_maxkmem /
 656  657              (sizeof (struct tmpnode) + sizeof (struct tdirent));
 657  658          sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
 658  659          (void) cmpldev(&d32, vfsp->vfs_dev);
 659  660          sbp->f_fsid = d32;
 660  661          (void) strcpy(sbp->f_basetype, vfssw[tmpfsfstype].vsw_name);
 661  662          (void) strncpy(sbp->f_fstr, tm->tm_mntpath, sizeof (sbp->f_fstr));
 662  663          /*
 663  664           * ensure null termination
 664  665           */
 665  666          sbp->f_fstr[sizeof (sbp->f_fstr) - 1] = '\0';
 666  667          sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
 667  668          sbp->f_namemax = MAXNAMELEN - 1;
 668  669          return (0);
 669  670  }
 670  671  
 671  672  static int
 672  673  tmp_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
 673  674  {
 674  675          struct tfid *tfid;
 675  676          struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
 676  677          struct tmpnode *tp = NULL;
 677  678  
 678  679          tfid = (struct tfid *)fidp;
 679  680          *vpp = NULL;
 680  681  
 681  682          mutex_enter(&tm->tm_contents);
 682  683          for (tp = tm->tm_rootnode; tp; tp = tp->tn_forw) {
 683  684                  mutex_enter(&tp->tn_tlock);
 684  685                  if (tp->tn_nodeid == tfid->tfid_ino) {
 685  686                          /*
 686  687                           * If the gen numbers don't match we know the
 687  688                           * file won't be found since only one tmpnode
 688  689                           * can have this number at a time.
 689  690                           */
 690  691                          if (tp->tn_gen != tfid->tfid_gen || tp->tn_nlink == 0) {
 691  692                                  mutex_exit(&tp->tn_tlock);
 692  693                                  mutex_exit(&tm->tm_contents);
 693  694                                  return (0);
 694  695                          }
 695  696                          *vpp = (struct vnode *)TNTOV(tp);
 696  697  
 697  698                          VN_HOLD(*vpp);
 698  699  
 699  700                          if ((tp->tn_mode & S_ISVTX) &&
 700  701                              !(tp->tn_mode & (S_IXUSR | S_IFDIR))) {
 701  702                                  mutex_enter(&(*vpp)->v_lock);
 702  703                                  (*vpp)->v_flag |= VISSWAP;
 703  704                                  mutex_exit(&(*vpp)->v_lock);
 704  705                          }
 705  706                          mutex_exit(&tp->tn_tlock);
 706  707                          mutex_exit(&tm->tm_contents);
 707  708                          return (0);
 708  709                  }
 709  710                  mutex_exit(&tp->tn_tlock);
 710  711          }
 711  712          mutex_exit(&tm->tm_contents);
 712  713          return (0);
 713  714  }

↓ open down ↓

542 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX