Print this page
    
re #13613 rb4516 Tunables needs volatile keyword
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/ufs/ufs_vfsops.c
          +++ new/usr/src/uts/common/fs/ufs/ufs_vfsops.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  
    | 
      ↓ open down ↓ | 
    17 lines elided | 
    
      ↑ open up ↑ | 
  
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   * Copyright 2016 Nexenta Systems, Inc.
  26   26   * Copyright (c) 2017 by Delphix. All rights reserved.
  27   27   */
       28 +/*
       29 + * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
       30 + */
  28   31  
  29   32  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  30   33  /*        All Rights Reserved   */
  31   34  
  32   35  /*
  33   36   * University Copyright- Copyright (c) 1982, 1986, 1988
  34   37   * The Regents of the University of California
  35   38   * All Rights Reserved
  36   39   *
  37   40   * University Acknowledgment- Portions of this document are derived from
  38   41   * software developed by the University of California, Berkeley, and its
  39   42   * contributors.
  40   43   */
  41   44  
  42   45  #include <sys/types.h>
  43   46  #include <sys/t_lock.h>
  44   47  #include <sys/param.h>
  45   48  #include <sys/systm.h>
  46   49  #include <sys/bitmap.h>
  47   50  #include <sys/sysmacros.h>
  48   51  #include <sys/kmem.h>
  49   52  #include <sys/signal.h>
  50   53  #include <sys/user.h>
  51   54  #include <sys/proc.h>
  52   55  #include <sys/disp.h>
  53   56  #include <sys/buf.h>
  54   57  #include <sys/pathname.h>
  55   58  #include <sys/vfs.h>
  56   59  #include <sys/vfs_opreg.h>
  57   60  #include <sys/vnode.h>
  58   61  #include <sys/file.h>
  59   62  #include <sys/atomic.h>
  60   63  #include <sys/uio.h>
  61   64  #include <sys/dkio.h>
  62   65  #include <sys/cred.h>
  63   66  #include <sys/conf.h>
  64   67  #include <sys/dnlc.h>
  65   68  #include <sys/kstat.h>
  66   69  #include <sys/acl.h>
  67   70  #include <sys/fs/ufs_fsdir.h>
  68   71  #include <sys/fs/ufs_fs.h>
  69   72  #include <sys/fs/ufs_inode.h>
  70   73  #include <sys/fs/ufs_mount.h>
  71   74  #include <sys/fs/ufs_acl.h>
  72   75  #include <sys/fs/ufs_panic.h>
  73   76  #include <sys/fs/ufs_bio.h>
  74   77  #include <sys/fs/ufs_quota.h>
  75   78  #include <sys/fs/ufs_log.h>
  76   79  #undef NFS
  77   80  #include <sys/statvfs.h>
  78   81  #include <sys/mount.h>
  79   82  #include <sys/mntent.h>
  80   83  #include <sys/swap.h>
  81   84  #include <sys/errno.h>
  82   85  #include <sys/debug.h>
  83   86  #include "fs/fs_subr.h"
  84   87  #include <sys/cmn_err.h>
  85   88  #include <sys/dnlc.h>
  86   89  #include <sys/fssnap_if.h>
  87   90  #include <sys/sunddi.h>
  88   91  #include <sys/bootconf.h>
  89   92  #include <sys/policy.h>
  90   93  #include <sys/zone.h>
  91   94  
  92   95  /*
  93   96   * This is the loadable module wrapper.
  94   97   */
  95   98  #include <sys/modctl.h>
  96   99  
  97  100  int                     ufsfstype;
  98  101  vfsops_t                *ufs_vfsops;
  99  102  static int              ufsinit(int, char *);
 100  103  static int              mountfs();
 101  104  extern int              highbit();
 102  105  extern struct instats   ins;
 103  106  extern struct vnode *common_specvp(struct vnode *vp);
 104  107  extern vfs_t            EIO_vfs;
 105  108  
 106  109  struct  dquot *dquot, *dquotNDQUOT;
 107  110  
 108  111  /*
 109  112   * Cylinder group summary information handling tunable.
 110  113   * This defines when these deltas get logged.
 111  114   * If the number of cylinders in the file system is over the
 112  115   * tunable then we log csum updates. Otherwise the updates are only
 113  116   * done for performance on unmount. After a panic they can be
 114  117   * quickly constructed during mounting. See ufs_construct_si()
 115  118   * called from ufs_getsummaryinfo().
 116  119   *
 117  120   * This performance feature can of course be disabled by setting
 118  121   * ufs_ncg_log to 0, and fully enabled by setting it to 0xffffffff.
 119  122   */
 120  123  #define UFS_LOG_NCG_DEFAULT 10000
 121  124  uint32_t ufs_ncg_log = UFS_LOG_NCG_DEFAULT;
 122  125  
 123  126  /*
 124  127   * ufs_clean_root indicates whether the root fs went down cleanly
 125  128   */
 126  129  static int ufs_clean_root = 0;
 127  130  
 128  131  /*
 129  132   * UFS Mount options table
 130  133   */
 131  134  static char *intr_cancel[] = { MNTOPT_NOINTR, NULL };
 132  135  static char *nointr_cancel[] = { MNTOPT_INTR, NULL };
 133  136  static char *forcedirectio_cancel[] = { MNTOPT_NOFORCEDIRECTIO, NULL };
 134  137  static char *noforcedirectio_cancel[] = { MNTOPT_FORCEDIRECTIO, NULL };
 135  138  static char *largefiles_cancel[] = { MNTOPT_NOLARGEFILES, NULL };
 136  139  static char *nolargefiles_cancel[] = { MNTOPT_LARGEFILES, NULL };
 137  140  static char *logging_cancel[] = { MNTOPT_NOLOGGING, NULL };
 138  141  static char *nologging_cancel[] = { MNTOPT_LOGGING, NULL };
 139  142  static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
 140  143  static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
 141  144  static char *quota_cancel[] = { MNTOPT_NOQUOTA, NULL };
 142  145  static char *noquota_cancel[] = { MNTOPT_QUOTA, NULL };
 143  146  static char *dfratime_cancel[] = { MNTOPT_NODFRATIME, NULL };
 144  147  static char *nodfratime_cancel[] = { MNTOPT_DFRATIME, NULL };
 145  148  
 146  149  static mntopt_t mntopts[] = {
 147  150  /*
 148  151   *      option name             cancel option   default arg     flags
 149  152   *              ufs arg flag
 150  153   */
 151  154          { MNTOPT_INTR,          intr_cancel,    NULL,           MO_DEFAULT,
 152  155                  (void *)0 },
 153  156          { MNTOPT_NOINTR,        nointr_cancel,  NULL,           0,
 154  157                  (void *)UFSMNT_NOINTR },
 155  158          { MNTOPT_SYNCDIR,       NULL,           NULL,           0,
 156  159                  (void *)UFSMNT_SYNCDIR },
 157  160          { MNTOPT_FORCEDIRECTIO, forcedirectio_cancel, NULL,     0,
 158  161                  (void *)UFSMNT_FORCEDIRECTIO },
 159  162          { MNTOPT_NOFORCEDIRECTIO, noforcedirectio_cancel, NULL, 0,
 160  163                  (void *)UFSMNT_NOFORCEDIRECTIO },
 161  164          { MNTOPT_NOSETSEC,      NULL,           NULL,           0,
 162  165                  (void *)UFSMNT_NOSETSEC },
 163  166          { MNTOPT_LARGEFILES,    largefiles_cancel, NULL,        MO_DEFAULT,
 164  167                  (void *)UFSMNT_LARGEFILES },
 165  168          { MNTOPT_NOLARGEFILES,  nolargefiles_cancel, NULL,      0,
 166  169                  (void *)0 },
 167  170          { MNTOPT_LOGGING,       logging_cancel, NULL,           MO_TAG,
 168  171                  (void *)UFSMNT_LOGGING },
 169  172          { MNTOPT_NOLOGGING,     nologging_cancel, NULL,
 170  173                  MO_NODISPLAY|MO_DEFAULT|MO_TAG, (void *)0 },
 171  174          { MNTOPT_QUOTA,         quota_cancel, NULL,             MO_IGNORE,
 172  175                  (void *)0 },
 173  176          { MNTOPT_NOQUOTA,       noquota_cancel, NULL,
 174  177                  MO_NODISPLAY|MO_DEFAULT, (void *)0 },
 175  178          { MNTOPT_GLOBAL,        NULL,           NULL,           0,
 176  179                  (void *)0 },
 177  180          { MNTOPT_XATTR, xattr_cancel,           NULL,           MO_DEFAULT,
 178  181                  (void *)0 },
 179  182          { MNTOPT_NOXATTR,       noxattr_cancel,         NULL,           0,
 180  183                  (void *)0 },
 181  184          { MNTOPT_NOATIME,       NULL,           NULL,           0,
 182  185                  (void *)UFSMNT_NOATIME },
 183  186          { MNTOPT_DFRATIME,      dfratime_cancel, NULL,          0,
 184  187                  (void *)0 },
 185  188          { MNTOPT_NODFRATIME,    nodfratime_cancel, NULL,
 186  189                  MO_NODISPLAY|MO_DEFAULT, (void *)UFSMNT_NODFRATIME },
 187  190          { MNTOPT_ONERROR,       NULL,           UFSMNT_ONERROR_PANIC_STR,
 188  191                  MO_DEFAULT|MO_HASVALUE, (void *)0 },
 189  192  };
 190  193  
 191  194  static mntopts_t ufs_mntopts = {
 192  195          sizeof (mntopts) / sizeof (mntopt_t),
 193  196          mntopts
 194  197  };
 195  198  
 196  199  static vfsdef_t vfw = {
 197  200          VFSDEF_VERSION,
 198  201          "ufs",
 199  202          ufsinit,
 200  203          VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI|VSW_MOUNTDEV,
 201  204          &ufs_mntopts
 202  205  };
 203  206  
 204  207  /*
 205  208   * Module linkage information for the kernel.
 206  209   */
 207  210  extern struct mod_ops mod_fsops;
 208  211  
 209  212  static struct modlfs modlfs = {
 210  213          &mod_fsops, "filesystem for ufs", &vfw
 211  214  };
 212  215  
 213  216  static struct modlinkage modlinkage = {
 214  217          MODREV_1, (void *)&modlfs, NULL
 215  218  };
 216  219  
 217  220  /*
 218  221   * An attempt has been made to make this module unloadable.  In order to
 219  222   * test it, we need a system in which the root fs is NOT ufs.  THIS HAS NOT
 220  223   * BEEN DONE
 221  224   */
 222  225  
 223  226  extern kstat_t *ufs_inode_kstat;
 224  227  extern uint_t ufs_lockfs_key;
 225  228  extern void ufs_lockfs_tsd_destructor(void *);
 226  229  extern uint_t bypass_snapshot_throttle_key;
 227  230  
 228  231  int
 229  232  _init(void)
 230  233  {
 231  234          /*
 232  235           * Create an index into the per thread array so that any thread doing
 233  236           * VOP will have a lockfs mark on it.
 234  237           */
 235  238          tsd_create(&ufs_lockfs_key, ufs_lockfs_tsd_destructor);
 236  239          tsd_create(&bypass_snapshot_throttle_key, NULL);
 237  240          return (mod_install(&modlinkage));
 238  241  }
 239  242  
 240  243  int
 241  244  _fini(void)
 242  245  {
 243  246          return (EBUSY);
 244  247  }
 245  248  
 246  249  int
 247  250  _info(struct modinfo *modinfop)
 248  251  {
 249  252          return (mod_info(&modlinkage, modinfop));
 250  253  }
 251  254  
 252  255  extern struct vnode *makespecvp(dev_t dev, vtype_t type);
 253  256  
 254  257  extern kmutex_t ufs_scan_lock;
 255  258  
 256  259  static int mountfs(struct vfs *, enum whymountroot, struct vnode *, char *,
 257  260                  struct cred *, int, void *, int);
 258  261  
 259  262  
 260  263  static int
 261  264  ufs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
 262  265      struct cred *cr)
 263  266  {
 264  267          char *data = uap->dataptr;
 265  268          int datalen = uap->datalen;
 266  269          dev_t dev;
 267  270          struct vnode *lvp = NULL;
 268  271          struct vnode *svp = NULL;
 269  272          struct pathname dpn;
 270  273          int error;
 271  274          enum whymountroot why = ROOT_INIT;
 272  275          struct ufs_args args;
 273  276          int oflag, aflag;
 274  277          int fromspace = (uap->flags & MS_SYSSPACE) ?
 275  278              UIO_SYSSPACE : UIO_USERSPACE;
 276  279  
 277  280          if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 278  281                  return (error);
 279  282  
 280  283          if (mvp->v_type != VDIR)
 281  284                  return (ENOTDIR);
 282  285  
 283  286          mutex_enter(&mvp->v_lock);
 284  287          if ((uap->flags & MS_REMOUNT) == 0 &&
 285  288              (uap->flags & MS_OVERLAY) == 0 &&
 286  289              (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 287  290                  mutex_exit(&mvp->v_lock);
 288  291                  return (EBUSY);
 289  292          }
 290  293          mutex_exit(&mvp->v_lock);
 291  294  
 292  295          /*
 293  296           * Get arguments
 294  297           */
 295  298          bzero(&args, sizeof (args));
 296  299          if ((uap->flags & MS_DATA) && data != NULL && datalen != 0) {
 297  300                  int copy_result = 0;
 298  301  
 299  302                  if (datalen > sizeof (args))
 300  303                          return (EINVAL);
 301  304                  if (uap->flags & MS_SYSSPACE)
 302  305                          bcopy(data, &args, datalen);
 303  306                  else
 304  307                          copy_result = copyin(data, &args, datalen);
 305  308                  if (copy_result)
 306  309                          return (EFAULT);
 307  310                  datalen = sizeof (struct ufs_args);
 308  311          } else {
 309  312                  datalen = 0;
 310  313          }
 311  314  
 312  315          if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
 313  316              (uap->flags & MS_RDONLY) != 0) {
 314  317                  oflag = FREAD;
 315  318                  aflag = VREAD;
 316  319          } else {
 317  320                  oflag = FREAD | FWRITE;
 318  321                  aflag = VREAD | VWRITE;
 319  322          }
 320  323  
 321  324          /*
 322  325           * Read in the mount point pathname
 323  326           * (so we can record the directory the file system was last mounted on).
 324  327           */
 325  328          if (error = pn_get(uap->dir, fromspace, &dpn))
 326  329                  return (error);
 327  330  
 328  331          /*
 329  332           * Resolve path name of special file being mounted.
 330  333           */
 331  334          if (error = lookupname(uap->spec, fromspace, FOLLOW, NULL, &svp)) {
 332  335                  pn_free(&dpn);
 333  336                  return (error);
 334  337          }
 335  338  
 336  339          error = vfs_get_lofi(vfsp, &lvp);
 337  340  
 338  341          if (error > 0) {
 339  342                  VN_RELE(svp);
 340  343                  pn_free(&dpn);
 341  344                  return (error);
 342  345          } else if (error == 0) {
 343  346                  dev = lvp->v_rdev;
 344  347  
 345  348                  if (getmajor(dev) >= devcnt) {
 346  349                          error = ENXIO;
 347  350                          goto out;
 348  351                  }
 349  352          } else {
 350  353                  dev = svp->v_rdev;
 351  354  
 352  355                  if (svp->v_type != VBLK) {
 353  356                          VN_RELE(svp);
 354  357                          pn_free(&dpn);
 355  358                          return (ENOTBLK);
 356  359                  }
 357  360  
 358  361                  if (getmajor(dev) >= devcnt) {
 359  362                          error = ENXIO;
 360  363                          goto out;
 361  364                  }
 362  365  
 363  366                  /*
 364  367                   * In SunCluster, requests to a global device are
 365  368                   * satisfied by a local device. We substitute the global
 366  369                   * pxfs node with a local spec node here.
 367  370                   */
 368  371                  if (IS_PXFSVP(svp)) {
 369  372                          ASSERT(lvp == NULL);
 370  373                          VN_RELE(svp);
 371  374                          svp = makespecvp(dev, VBLK);
 372  375                  }
 373  376  
 374  377                  if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0) {
 375  378                          VN_RELE(svp);
 376  379                          pn_free(&dpn);
 377  380                          return (error);
 378  381                  }
 379  382          }
 380  383  
 381  384          if (uap->flags & MS_REMOUNT)
 382  385                  why = ROOT_REMOUNT;
 383  386  
 384  387          /*
 385  388           * Open device/file mounted on.  We need this to check whether
 386  389           * the caller has sufficient rights to access the resource in
 387  390           * question.  When bio is fixed for vnodes this can all be vnode
 388  391           * operations.
 389  392           */
 390  393          if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
 391  394                  goto out;
 392  395  
 393  396          /*
 394  397           * Ensure that this device isn't already mounted or in progress on a
 395  398           * mount unless this is a REMOUNT request or we are told to suppress
 396  399           * mount checks. Global mounts require special handling.
 397  400           */
 398  401          if ((uap->flags & MS_NOCHECK) == 0) {
 399  402                  if ((uap->flags & MS_GLOBAL) == 0 &&
 400  403                      vfs_devmounting(dev, vfsp)) {
 401  404                          error = EBUSY;
 402  405                          goto out;
 403  406                  }
 404  407                  if (vfs_devismounted(dev)) {
 405  408                          if ((uap->flags & MS_REMOUNT) == 0) {
 406  409                                  error = EBUSY;
 407  410                                  goto out;
 408  411                          }
 409  412                  }
 410  413          }
 411  414  
 412  415          /*
 413  416           * If the device is a tape, mount it read only
 414  417           */
 415  418          if (devopsp[getmajor(dev)]->devo_cb_ops->cb_flag & D_TAPE) {
 416  419                  vfsp->vfs_flag |= VFS_RDONLY;
 417  420                  vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 418  421          }
 419  422          if (uap->flags & MS_RDONLY)
 420  423                  vfsp->vfs_flag |= VFS_RDONLY;
 421  424  
 422  425          /*
 423  426           * Mount the filesystem, free the device vnode on error.
 424  427           */
 425  428          error = mountfs(vfsp, why, lvp != NULL ? lvp : svp,
 426  429              dpn.pn_path, cr, 0, &args, datalen);
 427  430  
 428  431          if (error == 0) {
 429  432                  vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
 430  433  
 431  434                  /*
 432  435                   * If lofi, drop our reference to the original file.
 433  436                   */
 434  437                  if (lvp != NULL)
 435  438                          VN_RELE(svp);
 436  439          }
 437  440  
 438  441  out:
 439  442          pn_free(&dpn);
 440  443  
 441  444          if (error) {
 442  445                  if (lvp != NULL)
 443  446                          VN_RELE(lvp);
 444  447                  if (svp != NULL)
 445  448                          VN_RELE(svp);
 446  449          }
 447  450          return (error);
 448  451  }
 449  452  
 450  453  /*
 451  454   * Mount root file system.
 452  455   * "why" is ROOT_INIT on initial call ROOT_REMOUNT if called to
 453  456   * remount the root file system, and ROOT_UNMOUNT if called to
 454  457   * unmount the root (e.g., as part of a system shutdown).
 455  458   *
 456  459   * XXX - this may be partially machine-dependent; it, along with the VFS_SWAPVP
 457  460   * operation, goes along with auto-configuration.  A mechanism should be
 458  461   * provided by which machine-INdependent code in the kernel can say "get me the
 459  462   * right root file system" and "get me the right initial swap area", and have
 460  463   * that done in what may well be a machine-dependent fashion.
 461  464   * Unfortunately, it is also file-system-type dependent (NFS gets it via
 462  465   * bootparams calls, UFS gets it from various and sundry machine-dependent
 463  466   * mechanisms, as SPECFS does for swap).
 464  467   */
 465  468  static int
 466  469  ufs_mountroot(struct vfs *vfsp, enum whymountroot why)
 467  470  {
 468  471          struct fs *fsp;
 469  472          int error;
 470  473          static int ufsrootdone = 0;
 471  474          dev_t rootdev;
 472  475          struct vnode *vp;
 473  476          struct vnode *devvp = 0;
 474  477          int ovflags;
 475  478          int doclkset;
 476  479          ufsvfs_t *ufsvfsp;
 477  480  
 478  481          if (why == ROOT_INIT) {
 479  482                  if (ufsrootdone++)
 480  483                          return (EBUSY);
 481  484                  rootdev = getrootdev();
 482  485                  if (rootdev == (dev_t)NODEV)
 483  486                          return (ENODEV);
 484  487                  vfsp->vfs_dev = rootdev;
 485  488                  vfsp->vfs_flag |= VFS_RDONLY;
 486  489          } else if (why == ROOT_REMOUNT) {
 487  490                  vp = ((struct ufsvfs *)vfsp->vfs_data)->vfs_devvp;
 488  491                  (void) dnlc_purge_vfsp(vfsp, 0);
 489  492                  vp = common_specvp(vp);
 490  493                  (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_INVAL,
 491  494                      CRED(), NULL);
 492  495                  (void) bfinval(vfsp->vfs_dev, 0);
 493  496                  fsp = getfs(vfsp);
 494  497  
 495  498                  ovflags = vfsp->vfs_flag;
 496  499                  vfsp->vfs_flag &= ~VFS_RDONLY;
 497  500                  vfsp->vfs_flag |= VFS_REMOUNT;
 498  501                  rootdev = vfsp->vfs_dev;
 499  502          } else if (why == ROOT_UNMOUNT) {
 500  503                  if (vfs_lock(vfsp) == 0) {
 501  504                          (void) ufs_flush(vfsp);
 502  505                          /*
 503  506                           * Mark the log as fully rolled
 504  507                           */
 505  508                          ufsvfsp = (ufsvfs_t *)vfsp->vfs_data;
 506  509                          fsp = ufsvfsp->vfs_fs;
 507  510                          if (TRANS_ISTRANS(ufsvfsp) &&
 508  511                              !TRANS_ISERROR(ufsvfsp) &&
 509  512                              (fsp->fs_rolled == FS_NEED_ROLL)) {
 510  513                                  ml_unit_t *ul = ufsvfsp->vfs_log;
 511  514  
 512  515                                  error = ufs_putsummaryinfo(ul->un_dev,
 513  516                                      ufsvfsp, fsp);
 514  517                                  if (error == 0) {
 515  518                                          fsp->fs_rolled = FS_ALL_ROLLED;
 516  519                                          UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp);
 517  520                                  }
 518  521                          }
 519  522                          vfs_unlock(vfsp);
 520  523                  } else {
 521  524                          ufs_update(0);
 522  525                  }
 523  526  
 524  527                  vp = ((struct ufsvfs *)vfsp->vfs_data)->vfs_devvp;
 525  528                  (void) VOP_CLOSE(vp, FREAD|FWRITE, 1,
 526  529                      (offset_t)0, CRED(), NULL);
 527  530                  return (0);
 528  531          }
 529  532          error = vfs_lock(vfsp);
 530  533          if (error)
 531  534                  return (error);
 532  535  
 533  536          devvp = makespecvp(rootdev, VBLK);
 534  537  
 535  538          /* If RO media, don't call clkset() (see below) */
 536  539          doclkset = 1;
 537  540          if (why == ROOT_INIT) {
 538  541                  error = VOP_OPEN(&devvp, FREAD|FWRITE, CRED(), NULL);
 539  542                  if (error == 0) {
 540  543                          (void) VOP_CLOSE(devvp, FREAD|FWRITE, 1,
 541  544                              (offset_t)0, CRED(), NULL);
 542  545                  } else {
 543  546                          doclkset = 0;
 544  547                  }
 545  548          }
 546  549  
 547  550          error = mountfs(vfsp, why, devvp, "/", CRED(), 1, NULL, 0);
 548  551          /*
 549  552           * XXX - assumes root device is not indirect, because we don't set
 550  553           * rootvp.  Is rootvp used for anything?  If so, make another arg
 551  554           * to mountfs.
 552  555           */
 553  556          if (error) {
 554  557                  vfs_unlock(vfsp);
 555  558                  if (why == ROOT_REMOUNT)
 556  559                          vfsp->vfs_flag = ovflags;
 557  560                  if (rootvp) {
 558  561                          VN_RELE(rootvp);
 559  562                          rootvp = (struct vnode *)0;
 560  563                  }
 561  564                  VN_RELE(devvp);
 562  565                  return (error);
 563  566          }
 564  567          if (why == ROOT_INIT)
 565  568                  vfs_add((struct vnode *)0, vfsp,
 566  569                      (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
 567  570          vfs_unlock(vfsp);
 568  571          fsp = getfs(vfsp);
 569  572          clkset(doclkset ? fsp->fs_time : -1);
 570  573          ufsvfsp = (ufsvfs_t *)vfsp->vfs_data;
 571  574          if (ufsvfsp->vfs_log) {
 572  575                  vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
 573  576          }
 574  577          return (0);
 575  578  }
 576  579  
 577  580  static int
 578  581  remountfs(struct vfs *vfsp, dev_t dev, void *raw_argsp, int args_len)
 579  582  {
 580  583          struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
 581  584          struct ulockfs *ulp = &ufsvfsp->vfs_ulockfs;
 582  585          struct buf *bp = ufsvfsp->vfs_bufp;
 583  586          struct fs *fsp = (struct fs *)bp->b_un.b_addr;
 584  587          struct fs *fspt;
 585  588          struct buf *tpt = 0;
 586  589          int error = 0;
 587  590          int flags = 0;
 588  591  
 589  592          if (args_len == sizeof (struct ufs_args) && raw_argsp)
 590  593                  flags = ((struct ufs_args *)raw_argsp)->flags;
 591  594  
 592  595          /* cannot remount to RDONLY */
 593  596          if (vfsp->vfs_flag & VFS_RDONLY)
 594  597                  return (ENOTSUP);
 595  598  
 596  599          /* whoops, wrong dev */
 597  600          if (vfsp->vfs_dev != dev)
 598  601                  return (EINVAL);
 599  602  
 600  603          /*
 601  604           * synchronize w/ufs ioctls
 602  605           */
 603  606          mutex_enter(&ulp->ul_lock);
 604  607          atomic_inc_ulong(&ufs_quiesce_pend);
 605  608  
 606  609          /*
 607  610           * reset options
 608  611           */
 609  612          ufsvfsp->vfs_nointr  = flags & UFSMNT_NOINTR;
 610  613          ufsvfsp->vfs_syncdir = flags & UFSMNT_SYNCDIR;
 611  614          ufsvfsp->vfs_nosetsec = flags & UFSMNT_NOSETSEC;
 612  615          ufsvfsp->vfs_noatime = flags & UFSMNT_NOATIME;
 613  616          if ((flags & UFSMNT_NODFRATIME) || ufsvfsp->vfs_noatime)
 614  617                  ufsvfsp->vfs_dfritime &= ~UFS_DFRATIME;
 615  618          else    /* dfratime, default behavior */
 616  619                  ufsvfsp->vfs_dfritime |= UFS_DFRATIME;
 617  620          if (flags & UFSMNT_FORCEDIRECTIO)
 618  621                  ufsvfsp->vfs_forcedirectio = 1;
 619  622          else    /* default is no direct I/O */
 620  623                  ufsvfsp->vfs_forcedirectio = 0;
 621  624          ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
 622  625  
 623  626          /*
 624  627           * set largefiles flag in ufsvfs equal to the
 625  628           * value passed in by the mount command. If
 626  629           * it is "nolargefiles", and the flag is set
 627  630           * in the superblock, the mount fails.
 628  631           */
 629  632          if (!(flags & UFSMNT_LARGEFILES)) {  /* "nolargefiles" */
 630  633                  if (fsp->fs_flags & FSLARGEFILES) {
 631  634                          error = EFBIG;
 632  635                          goto remounterr;
 633  636                  }
 634  637                  ufsvfsp->vfs_lfflags &= ~UFS_LARGEFILES;
 635  638          } else  /* "largefiles" */
 636  639                  ufsvfsp->vfs_lfflags |= UFS_LARGEFILES;
 637  640          /*
 638  641           * read/write to read/write; all done
 639  642           */
 640  643          if (fsp->fs_ronly == 0)
 641  644                  goto remounterr;
 642  645  
 643  646          /*
 644  647           * fix-on-panic assumes RO->RW remount implies system-critical fs
 645  648           * if it is shortly after boot; so, don't attempt to lock and fix
 646  649           * (unless the user explicitly asked for another action on error)
 647  650           * XXX UFSMNT_ONERROR_RDONLY rather than UFSMNT_ONERROR_PANIC
 648  651           */
 649  652  #define BOOT_TIME_LIMIT (180*hz)
 650  653          if (!(flags & UFSMNT_ONERROR_FLGMASK) &&
 651  654              ddi_get_lbolt() < BOOT_TIME_LIMIT) {
 652  655                  cmn_err(CE_WARN, "%s is required to be mounted onerror=%s",
 653  656                      ufsvfsp->vfs_fs->fs_fsmnt, UFSMNT_ONERROR_PANIC_STR);
 654  657                  flags |= UFSMNT_ONERROR_PANIC;
 655  658          }
 656  659  
 657  660          if ((error = ufsfx_mount(ufsvfsp, flags)) != 0)
 658  661                  goto remounterr;
 659  662  
 660  663          /*
 661  664           * quiesce the file system
 662  665           */
 663  666          error = ufs_quiesce(ulp);
 664  667          if (error)
 665  668                  goto remounterr;
 666  669  
 667  670          tpt = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, SBLOCK, SBSIZE);
 668  671          if (tpt->b_flags & B_ERROR) {
 669  672                  error = EIO;
 670  673                  goto remounterr;
 671  674          }
 672  675          fspt = (struct fs *)tpt->b_un.b_addr;
 673  676          if (((fspt->fs_magic != FS_MAGIC) &&
 674  677              (fspt->fs_magic != MTB_UFS_MAGIC)) ||
 675  678              (fspt->fs_magic == FS_MAGIC &&
 676  679              (fspt->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
 677  680              fspt->fs_version != UFS_VERSION_MIN)) ||
 678  681              (fspt->fs_magic == MTB_UFS_MAGIC &&
 679  682              (fspt->fs_version > MTB_UFS_VERSION_1 ||
 680  683              fspt->fs_version < MTB_UFS_VERSION_MIN)) ||
 681  684              fspt->fs_bsize > MAXBSIZE || fspt->fs_frag > MAXFRAG ||
 682  685              fspt->fs_bsize < sizeof (struct fs) || fspt->fs_bsize < PAGESIZE) {
 683  686                  tpt->b_flags |= B_STALE | B_AGE;
 684  687                  error = EINVAL;
 685  688                  goto remounterr;
 686  689          }
 687  690  
 688  691          if (ufsvfsp->vfs_log && (ufsvfsp->vfs_log->un_flags & LDL_NOROLL)) {
 689  692                  ufsvfsp->vfs_log->un_flags &= ~LDL_NOROLL;
 690  693                  logmap_start_roll(ufsvfsp->vfs_log);
 691  694          }
 692  695  
 693  696          if (TRANS_ISERROR(ufsvfsp))
 694  697                  goto remounterr;
 695  698          TRANS_DOMATAMAP(ufsvfsp);
 696  699  
 697  700          if ((fspt->fs_state + fspt->fs_time == FSOKAY) &&
 698  701              fspt->fs_clean == FSLOG && !TRANS_ISTRANS(ufsvfsp)) {
 699  702                  ufsvfsp->vfs_log = NULL;
 700  703                  ufsvfsp->vfs_domatamap = 0;
 701  704                  error = ENOSPC;
 702  705                  goto remounterr;
 703  706          }
 704  707  
 705  708          if (fspt->fs_state + fspt->fs_time == FSOKAY &&
 706  709              (fspt->fs_clean == FSCLEAN ||
 707  710              fspt->fs_clean == FSSTABLE ||
 708  711              fspt->fs_clean == FSLOG)) {
 709  712  
 710  713                  /*
 711  714                   * Ensure that ufs_getsummaryinfo doesn't reconstruct
 712  715                   * the summary info.
 713  716                   */
 714  717                  error = ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, fspt);
 715  718                  if (error)
 716  719                          goto remounterr;
 717  720  
 718  721                  /* preserve mount name */
 719  722                  (void) strncpy(fspt->fs_fsmnt, fsp->fs_fsmnt, MAXMNTLEN);
 720  723                  /* free the old cg space */
 721  724                  kmem_free(fsp->fs_u.fs_csp, fsp->fs_cssize);
 722  725                  /* switch in the new superblock */
 723  726                  fspt->fs_rolled = FS_NEED_ROLL;
 724  727                  bcopy(tpt->b_un.b_addr, bp->b_un.b_addr, fspt->fs_sbsize);
 725  728  
 726  729                  fsp->fs_clean = FSSTABLE;
 727  730          } /* superblock updated in memory */
 728  731          tpt->b_flags |= B_STALE | B_AGE;
 729  732          brelse(tpt);
 730  733          tpt = 0;
 731  734  
 732  735          if (fsp->fs_clean != FSSTABLE) {
 733  736                  error = ENOSPC;
 734  737                  goto remounterr;
 735  738          }
 736  739  
 737  740  
 738  741          if (TRANS_ISTRANS(ufsvfsp)) {
 739  742                  fsp->fs_clean = FSLOG;
 740  743                  ufsvfsp->vfs_dio = 0;
 741  744          } else
 742  745                  if (ufsvfsp->vfs_dio)
 743  746                          fsp->fs_clean = FSSUSPEND;
 744  747  
 745  748          TRANS_MATA_MOUNT(ufsvfsp);
 746  749  
 747  750          fsp->fs_fmod = 0;
 748  751          fsp->fs_ronly = 0;
 749  752  
 750  753          atomic_dec_ulong(&ufs_quiesce_pend);
 751  754          cv_broadcast(&ulp->ul_cv);
 752  755          mutex_exit(&ulp->ul_lock);
 753  756  
 754  757          if (TRANS_ISTRANS(ufsvfsp)) {
 755  758  
 756  759                  /*
 757  760                   * start the delete thread
 758  761                   */
 759  762                  ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp);
 760  763  
 761  764                  /*
 762  765                   * start the reclaim thread
 763  766                   */
 764  767                  if (fsp->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
 765  768                          fsp->fs_reclaim &= ~FS_RECLAIM;
 766  769                          fsp->fs_reclaim |=  FS_RECLAIMING;
 767  770                          ufs_thread_start(&ufsvfsp->vfs_reclaim,
 768  771                              ufs_thread_reclaim, vfsp);
 769  772                  }
 770  773          }
 771  774  
 772  775          TRANS_SBWRITE(ufsvfsp, TOP_MOUNT);
 773  776  
 774  777          return (0);
 775  778  
 776  779  remounterr:
 777  780          if (tpt)
 778  781                  brelse(tpt);
 779  782          atomic_dec_ulong(&ufs_quiesce_pend);
 780  783          cv_broadcast(&ulp->ul_cv);
 781  784          mutex_exit(&ulp->ul_lock);
 782  785          return (error);
 783  786  }
 784  787  
 785  788  /*
 786  789   * If the device maxtransfer size is not available, we use ufs_maxmaxphys
 787  790   * along with the system value for maxphys to determine the value for
 788  791   * maxtransfer.
 789  792   */
 790  793  int ufs_maxmaxphys = (1024 * 1024);
 791  794  
 792  795  #include <sys/ddi.h>            /* for delay(9f) */
 793  796  
 794  797  int ufs_mount_error_delay = 20; /* default to 20ms */
 795  798  int ufs_mount_timeout = 60000;  /* default to 1 minute */
 796  799  
 797  800  static int
 798  801  mountfs(struct vfs *vfsp, enum whymountroot why, struct vnode *devvp,
 799  802      char *path, cred_t *cr, int isroot, void *raw_argsp, int args_len)
 800  803  {
 801  804          dev_t dev = devvp->v_rdev;
 802  805          struct fs *fsp;
 803  806          struct ufsvfs *ufsvfsp = 0;
 804  807          struct buf *bp = 0;
 805  808          struct buf *tp = 0;
 806  809          struct dk_cinfo ci;
  
    | 
      ↓ open down ↓ | 
    769 lines elided | 
    
      ↑ open up ↑ | 
  
 807  810          int error = 0;
 808  811          size_t len;
 809  812          int needclose = 0;
 810  813          int needtrans = 0;
 811  814          struct inode *rip;
 812  815          struct vnode *rvp = NULL;
 813  816          int flags = 0;
 814  817          kmutex_t *ihm;
 815  818          int elapsed;
 816  819          int status;
 817      -        extern  int     maxphys;
 818  820  
 819  821          if (args_len == sizeof (struct ufs_args) && raw_argsp)
 820  822                  flags = ((struct ufs_args *)raw_argsp)->flags;
 821  823  
 822  824          ASSERT(vfs_lock_held(vfsp));
 823  825  
 824  826          if (why == ROOT_INIT) {
 825  827                  /*
 826  828                   * Open block device mounted on.
 827  829                   * When bio is fixed for vnodes this can all be vnode
 828  830                   * operations.
 829  831                   */
 830  832                  error = VOP_OPEN(&devvp,
 831  833                      (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
 832  834                      cr, NULL);
 833  835                  if (error)
 834  836                          goto out;
 835  837                  needclose = 1;
 836  838  
 837  839                  /*
 838  840                   * Refuse to go any further if this
 839  841                   * device is being used for swapping.
 840  842                   */
 841  843                  if (IS_SWAPVP(devvp)) {
 842  844                          error = EBUSY;
 843  845                          goto out;
 844  846                  }
 845  847          }
 846  848  
 847  849          /*
 848  850           * check for dev already mounted on
 849  851           */
 850  852          if (vfsp->vfs_flag & VFS_REMOUNT) {
 851  853                  error = remountfs(vfsp, dev, raw_argsp, args_len);
 852  854                  if (error == 0)
 853  855                          VN_RELE(devvp);
 854  856                  return (error);
 855  857          }
 856  858  
 857  859          ASSERT(devvp != 0);
 858  860  
 859  861          /*
 860  862           * Flush back any dirty pages on the block device to
 861  863           * try and keep the buffer cache in sync with the page
 862  864           * cache if someone is trying to use block devices when
 863  865           * they really should be using the raw device.
 864  866           */
 865  867          (void) VOP_PUTPAGE(common_specvp(devvp), (offset_t)0,
 866  868              (size_t)0, B_INVAL, cr, NULL);
 867  869  
 868  870          /*
 869  871           * read in superblock
 870  872           */
 871  873          ufsvfsp = kmem_zalloc(sizeof (struct ufsvfs), KM_SLEEP);
 872  874          tp = UFS_BREAD(ufsvfsp, dev, SBLOCK, SBSIZE);
 873  875          if (tp->b_flags & B_ERROR)
 874  876                  goto out;
 875  877          fsp = (struct fs *)tp->b_un.b_addr;
 876  878  
 877  879          if ((fsp->fs_magic != FS_MAGIC) && (fsp->fs_magic != MTB_UFS_MAGIC)) {
 878  880                  cmn_err(CE_NOTE,
 879  881                      "mount: not a UFS magic number (0x%x)", fsp->fs_magic);
 880  882                  error = EINVAL;
 881  883                  goto out;
 882  884          }
 883  885  
 884  886          if ((fsp->fs_magic == FS_MAGIC) &&
 885  887              (fsp->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
 886  888              fsp->fs_version != UFS_VERSION_MIN)) {
 887  889                  cmn_err(CE_NOTE,
 888  890                      "mount: unrecognized version of UFS on-disk format: %d",
 889  891                      fsp->fs_version);
 890  892                  error = EINVAL;
 891  893                  goto out;
 892  894          }
 893  895  
 894  896          if ((fsp->fs_magic == MTB_UFS_MAGIC) &&
 895  897              (fsp->fs_version > MTB_UFS_VERSION_1 ||
 896  898              fsp->fs_version < MTB_UFS_VERSION_MIN)) {
 897  899                  cmn_err(CE_NOTE,
 898  900                      "mount: unrecognized version of UFS on-disk format: %d",
 899  901                      fsp->fs_version);
 900  902                  error = EINVAL;
 901  903                  goto out;
 902  904          }
 903  905  
 904  906  #ifndef _LP64
 905  907          if (fsp->fs_magic == MTB_UFS_MAGIC) {
 906  908                  /*
 907  909                   * Find the size of the device in sectors.  If the
 908  910                   * the size in sectors is greater than INT_MAX, it's
 909  911                   * a multi-terabyte file system, which can't be
 910  912                   * mounted by a 32-bit kernel.  We can't use the
 911  913                   * fsbtodb() macro in the next line because the macro
 912  914                   * casts the intermediate values to daddr_t, which is
 913  915                   * a 32-bit quantity in a 32-bit kernel.  Here we
 914  916                   * really do need the intermediate values to be held
 915  917                   * in 64-bit quantities because we're checking for
 916  918                   * overflow of a 32-bit field.
 917  919                   */
 918  920                  if ((((diskaddr_t)(fsp->fs_size)) << fsp->fs_fsbtodb)
 919  921                      > INT_MAX) {
 920  922                          cmn_err(CE_NOTE,
 921  923                              "mount: multi-terabyte UFS cannot be"
 922  924                              " mounted by a 32-bit kernel");
 923  925                          error = EINVAL;
 924  926                          goto out;
 925  927                  }
 926  928  
 927  929          }
 928  930  #endif
 929  931  
 930  932          if (fsp->fs_bsize > MAXBSIZE || fsp->fs_frag > MAXFRAG ||
 931  933              fsp->fs_bsize < sizeof (struct fs) || fsp->fs_bsize < PAGESIZE) {
 932  934                  error = EINVAL; /* also needs translation */
 933  935                  goto out;
 934  936          }
 935  937  
 936  938          /*
 937  939           * Allocate VFS private data.
 938  940           */
 939  941          vfsp->vfs_bcount = 0;
 940  942          vfsp->vfs_data = (caddr_t)ufsvfsp;
 941  943          vfsp->vfs_fstype = ufsfstype;
 942  944          vfsp->vfs_dev = dev;
 943  945          vfsp->vfs_flag |= VFS_NOTRUNC;
 944  946          vfs_make_fsid(&vfsp->vfs_fsid, dev, ufsfstype);
 945  947          ufsvfsp->vfs_devvp = devvp;
 946  948  
 947  949          /*
 948  950           * Cross-link with vfs and add to instance list.
 949  951           */
 950  952          ufsvfsp->vfs_vfs = vfsp;
 951  953          ufs_vfs_add(ufsvfsp);
 952  954  
 953  955          ufsvfsp->vfs_dev = dev;
 954  956          ufsvfsp->vfs_bufp = tp;
 955  957  
 956  958          ufsvfsp->vfs_dirsize = INODESIZE + (4 * ALLOCSIZE) + fsp->fs_fsize;
 957  959          ufsvfsp->vfs_minfrags =
 958  960              (int)((int64_t)fsp->fs_dsize * fsp->fs_minfree / 100);
 959  961          /*
 960  962           * if mount allows largefiles, indicate so in ufsvfs
 961  963           */
 962  964          if (flags & UFSMNT_LARGEFILES)
 963  965                  ufsvfsp->vfs_lfflags |= UFS_LARGEFILES;
 964  966          /*
 965  967           * Initialize threads
 966  968           */
 967  969          ufs_delete_init(ufsvfsp, 1);
 968  970          ufs_thread_init(&ufsvfsp->vfs_reclaim, 0);
 969  971  
 970  972          /*
 971  973           * Chicken and egg problem. The superblock may have deltas
 972  974           * in the log.  So after the log is scanned we reread the
 973  975           * superblock. We guarantee that the fields needed to
 974  976           * scan the log will not be in the log.
 975  977           */
 976  978          if (fsp->fs_logbno && fsp->fs_clean == FSLOG &&
 977  979              (fsp->fs_state + fsp->fs_time == FSOKAY)) {
 978  980                  error = lufs_snarf(ufsvfsp, fsp, (vfsp->vfs_flag & VFS_RDONLY));
 979  981                  if (error) {
 980  982                          /*
 981  983                           * Allow a ro mount to continue even if the
 982  984                           * log cannot be processed - yet.
 983  985                           */
 984  986                          if (!(vfsp->vfs_flag & VFS_RDONLY)) {
 985  987                                  cmn_err(CE_WARN, "Error accessing ufs "
 986  988                                      "log for %s; Please run fsck(1M)", path);
 987  989                                  goto out;
 988  990                          }
 989  991                  }
 990  992                  tp->b_flags |= (B_AGE | B_STALE);
 991  993                  brelse(tp);
 992  994                  tp = UFS_BREAD(ufsvfsp, dev, SBLOCK, SBSIZE);
 993  995                  fsp = (struct fs *)tp->b_un.b_addr;
 994  996                  ufsvfsp->vfs_bufp = tp;
 995  997                  if (tp->b_flags & B_ERROR)
 996  998                          goto out;
 997  999          }
 998 1000  
 999 1001          /*
1000 1002           * Set logging mounted flag used by lockfs
1001 1003           */
1002 1004          ufsvfsp->vfs_validfs = UT_MOUNTED;
1003 1005  
1004 1006          /*
1005 1007           * Copy the super block into a buffer in its native size.
1006 1008           * Use ngeteblk to allocate the buffer
1007 1009           */
1008 1010          bp = ngeteblk(fsp->fs_bsize);
1009 1011          ufsvfsp->vfs_bufp = bp;
1010 1012          bp->b_edev = dev;
1011 1013          bp->b_dev = cmpdev(dev);
1012 1014          bp->b_blkno = SBLOCK;
1013 1015          bp->b_bcount = fsp->fs_sbsize;
1014 1016          bcopy(tp->b_un.b_addr, bp->b_un.b_addr, fsp->fs_sbsize);
1015 1017          tp->b_flags |= B_STALE | B_AGE;
1016 1018          brelse(tp);
1017 1019          tp = 0;
1018 1020  
1019 1021          fsp = (struct fs *)bp->b_un.b_addr;
1020 1022          /*
1021 1023           * Mount fails if superblock flag indicates presence of large
1022 1024           * files and filesystem is attempted to be mounted 'nolargefiles'.
1023 1025           * The exception is for a read only mount of root, which we
1024 1026           * always want to succeed, so fsck can fix potential problems.
1025 1027           * The assumption is that we will remount root at some point,
1026 1028           * and the remount will enforce the mount option.
1027 1029           */
1028 1030          if (!(isroot & (vfsp->vfs_flag & VFS_RDONLY)) &&
1029 1031              (fsp->fs_flags & FSLARGEFILES) &&
1030 1032              !(flags & UFSMNT_LARGEFILES)) {
1031 1033                  error = EFBIG;
1032 1034                  goto out;
1033 1035          }
1034 1036  
1035 1037          if (vfsp->vfs_flag & VFS_RDONLY) {
1036 1038                  fsp->fs_ronly = 1;
1037 1039                  fsp->fs_fmod = 0;
1038 1040                  if (((fsp->fs_state + fsp->fs_time) == FSOKAY) &&
1039 1041                      ((fsp->fs_clean == FSCLEAN) ||
1040 1042                      (fsp->fs_clean == FSSTABLE) ||
1041 1043                      (fsp->fs_clean == FSLOG))) {
1042 1044                          if (isroot) {
1043 1045                                  if (fsp->fs_clean == FSLOG) {
1044 1046                                          if (fsp->fs_rolled == FS_ALL_ROLLED) {
1045 1047                                                  ufs_clean_root = 1;
1046 1048                                          }
1047 1049                                  } else {
1048 1050                                          ufs_clean_root = 1;
1049 1051                                  }
1050 1052                          }
1051 1053                          fsp->fs_clean = FSSTABLE;
1052 1054                  } else {
1053 1055                          fsp->fs_clean = FSBAD;
1054 1056                  }
1055 1057          } else {
1056 1058  
1057 1059                  fsp->fs_fmod = 0;
1058 1060                  fsp->fs_ronly = 0;
1059 1061  
1060 1062                  TRANS_DOMATAMAP(ufsvfsp);
1061 1063  
1062 1064                  if ((TRANS_ISERROR(ufsvfsp)) ||
1063 1065                      (((fsp->fs_state + fsp->fs_time) == FSOKAY) &&
1064 1066                      fsp->fs_clean == FSLOG && !TRANS_ISTRANS(ufsvfsp))) {
1065 1067                          ufsvfsp->vfs_log = NULL;
1066 1068                          ufsvfsp->vfs_domatamap = 0;
1067 1069                          error = ENOSPC;
1068 1070                          goto out;
1069 1071                  }
1070 1072  
1071 1073                  if (((fsp->fs_state + fsp->fs_time) == FSOKAY) &&
1072 1074                      (fsp->fs_clean == FSCLEAN ||
1073 1075                      fsp->fs_clean == FSSTABLE ||
1074 1076                      fsp->fs_clean == FSLOG))
1075 1077                          fsp->fs_clean = FSSTABLE;
1076 1078                  else {
1077 1079                          if (isroot) {
1078 1080                                  /*
1079 1081                                   * allow root partition to be mounted even
1080 1082                                   * when fs_state is not ok
1081 1083                                   * will be fixed later by a remount root
1082 1084                                   */
1083 1085                                  fsp->fs_clean = FSBAD;
1084 1086                                  ufsvfsp->vfs_log = NULL;
1085 1087                                  ufsvfsp->vfs_domatamap = 0;
1086 1088                          } else {
1087 1089                                  error = ENOSPC;
1088 1090                                  goto out;
1089 1091                          }
1090 1092                  }
1091 1093  
1092 1094                  if (fsp->fs_clean == FSSTABLE && TRANS_ISTRANS(ufsvfsp))
1093 1095                          fsp->fs_clean = FSLOG;
1094 1096          }
1095 1097          TRANS_MATA_MOUNT(ufsvfsp);
1096 1098          needtrans = 1;
1097 1099  
1098 1100          vfsp->vfs_bsize = fsp->fs_bsize;
1099 1101  
1100 1102          /*
1101 1103           * Read in summary info
1102 1104           */
1103 1105          if (error = ufs_getsummaryinfo(dev, ufsvfsp, fsp))
1104 1106                  goto out;
1105 1107  
1106 1108          /*
1107 1109           * lastwhinetime is set to zero rather than lbolt, so that after
1108 1110           * mounting if the filesystem is found to be full, then immediately the
1109 1111           * "file system message" will be logged.
1110 1112           */
1111 1113          ufsvfsp->vfs_lastwhinetime = 0L;
1112 1114  
1113 1115  
1114 1116          mutex_init(&ufsvfsp->vfs_lock, NULL, MUTEX_DEFAULT, NULL);
1115 1117          (void) copystr(path, fsp->fs_fsmnt, sizeof (fsp->fs_fsmnt) - 1, &len);
1116 1118          bzero(fsp->fs_fsmnt + len, sizeof (fsp->fs_fsmnt) - len);
1117 1119  
1118 1120          /*
1119 1121           * Sanity checks for old file systems
1120 1122           */
1121 1123          if (fsp->fs_postblformat == FS_42POSTBLFMT)
1122 1124                  ufsvfsp->vfs_nrpos = 8;
1123 1125          else
1124 1126                  ufsvfsp->vfs_nrpos = fsp->fs_nrpos;
1125 1127  
1126 1128          /*
1127 1129           * Initialize lockfs structure to support file system locking
1128 1130           */
1129 1131          bzero(&ufsvfsp->vfs_ulockfs.ul_lockfs,
1130 1132              sizeof (struct lockfs));
1131 1133          ufsvfsp->vfs_ulockfs.ul_fs_lock = ULOCKFS_ULOCK;
1132 1134          mutex_init(&ufsvfsp->vfs_ulockfs.ul_lock, NULL,
1133 1135              MUTEX_DEFAULT, NULL);
1134 1136          cv_init(&ufsvfsp->vfs_ulockfs.ul_cv, NULL, CV_DEFAULT, NULL);
1135 1137  
1136 1138          /*
1137 1139           * We don't need to grab vfs_dqrwlock for this ufs_iget() call.
1138 1140           * We are in the process of mounting the file system so there
1139 1141           * is no need to grab the quota lock. If a quota applies to the
1140 1142           * root inode, then it will be updated when quotas are enabled.
1141 1143           *
1142 1144           * However, we have an ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock))
1143 1145           * in getinoquota() that we want to keep so grab it anyway.
1144 1146           */
1145 1147          rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1146 1148  
1147 1149          error = ufs_iget_alloced(vfsp, UFSROOTINO, &rip, cr);
1148 1150  
1149 1151          rw_exit(&ufsvfsp->vfs_dqrwlock);
1150 1152  
1151 1153          if (error)
1152 1154                  goto out;
1153 1155  
1154 1156          /*
1155 1157           * make sure root inode is a directory.  Returning ENOTDIR might
1156 1158           * be confused with the mount point not being a directory, so
1157 1159           * we use EIO instead.
1158 1160           */
1159 1161          if ((rip->i_mode & IFMT) != IFDIR) {
1160 1162                  /*
1161 1163                   * Mark this inode as subject for cleanup
1162 1164                   * to avoid stray inodes in the cache.
1163 1165                   */
1164 1166                  rvp = ITOV(rip);
1165 1167                  error = EIO;
1166 1168                  goto out;
1167 1169          }
1168 1170  
1169 1171          rvp = ITOV(rip);
1170 1172          mutex_enter(&rvp->v_lock);
1171 1173          rvp->v_flag |= VROOT;
1172 1174          mutex_exit(&rvp->v_lock);
1173 1175          ufsvfsp->vfs_root = rvp;
1174 1176          /* The buffer for the root inode does not contain a valid b_vp */
1175 1177          (void) bfinval(dev, 0);
1176 1178  
1177 1179          /* options */
1178 1180          ufsvfsp->vfs_nosetsec = flags & UFSMNT_NOSETSEC;
1179 1181          ufsvfsp->vfs_nointr  = flags & UFSMNT_NOINTR;
1180 1182          ufsvfsp->vfs_syncdir = flags & UFSMNT_SYNCDIR;
1181 1183          ufsvfsp->vfs_noatime = flags & UFSMNT_NOATIME;
1182 1184          if ((flags & UFSMNT_NODFRATIME) || ufsvfsp->vfs_noatime)
1183 1185                  ufsvfsp->vfs_dfritime &= ~UFS_DFRATIME;
1184 1186          else    /* dfratime, default behavior */
1185 1187                  ufsvfsp->vfs_dfritime |= UFS_DFRATIME;
1186 1188          if (flags & UFSMNT_FORCEDIRECTIO)
1187 1189                  ufsvfsp->vfs_forcedirectio = 1;
1188 1190          else if (flags & UFSMNT_NOFORCEDIRECTIO)
1189 1191                  ufsvfsp->vfs_forcedirectio = 0;
1190 1192          ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
1191 1193  
1192 1194          ufsvfsp->vfs_nindiroffset = fsp->fs_nindir - 1;
1193 1195          ufsvfsp->vfs_nindirshift = highbit(ufsvfsp->vfs_nindiroffset);
1194 1196          ufsvfsp->vfs_ioclustsz = fsp->fs_bsize * fsp->fs_maxcontig;
1195 1197  
1196 1198          if (cdev_ioctl(dev, DKIOCINFO, (intptr_t)&ci,
1197 1199              FKIOCTL|FNATIVE|FREAD, CRED(), &status) == 0) {
1198 1200                  ufsvfsp->vfs_iotransz = ci.dki_maxtransfer * DEV_BSIZE;
1199 1201          } else {
1200 1202                  ufsvfsp->vfs_iotransz = MIN(maxphys, ufs_maxmaxphys);
1201 1203          }
1202 1204  
1203 1205          if (ufsvfsp->vfs_iotransz <= 0) {
1204 1206                  ufsvfsp->vfs_iotransz = MIN(maxphys, ufs_maxmaxphys);
1205 1207          }
1206 1208  
1207 1209          /*
1208 1210           * When logging, used to reserve log space for writes and truncs
1209 1211           */
1210 1212          ufsvfsp->vfs_avgbfree = fsp->fs_cstotal.cs_nbfree / fsp->fs_ncg;
1211 1213  
1212 1214          /*
1213 1215           * Determine whether to log cylinder group summary info.
1214 1216           */
1215 1217          ufsvfsp->vfs_nolog_si = (fsp->fs_ncg < ufs_ncg_log);
1216 1218  
1217 1219          if (TRANS_ISTRANS(ufsvfsp)) {
1218 1220                  /*
1219 1221                   * start the delete thread
1220 1222                   */
1221 1223                  ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp);
1222 1224  
1223 1225                  /*
1224 1226                   * start reclaim thread if the filesystem was not mounted
1225 1227                   * read only.
1226 1228                   */
1227 1229                  if (!fsp->fs_ronly && (fsp->fs_reclaim &
1228 1230                      (FS_RECLAIM|FS_RECLAIMING))) {
1229 1231                          fsp->fs_reclaim &= ~FS_RECLAIM;
1230 1232                          fsp->fs_reclaim |=  FS_RECLAIMING;
1231 1233                          ufs_thread_start(&ufsvfsp->vfs_reclaim,
1232 1234                              ufs_thread_reclaim, vfsp);
1233 1235                  }
1234 1236  
1235 1237                  /* Mark the fs as unrolled */
1236 1238                  fsp->fs_rolled = FS_NEED_ROLL;
1237 1239          } else if (!fsp->fs_ronly && (fsp->fs_reclaim &
1238 1240              (FS_RECLAIM|FS_RECLAIMING))) {
1239 1241                  /*
1240 1242                   * If a file system that is mounted nologging, after
1241 1243                   * having previously been mounted logging, becomes
1242 1244                   * unmounted whilst the reclaim thread is in the throes
1243 1245                   * of reclaiming open/deleted inodes, a subsequent mount
1244 1246                   * of such a file system with logging disabled could lead
1245 1247                   * to inodes becoming lost.  So, start reclaim now, even
1246 1248                   * though logging was disabled for the previous mount, to
1247 1249                   * tidy things up.
1248 1250                   */
1249 1251                  fsp->fs_reclaim &= ~FS_RECLAIM;
1250 1252                  fsp->fs_reclaim |=  FS_RECLAIMING;
1251 1253                  ufs_thread_start(&ufsvfsp->vfs_reclaim,
1252 1254                      ufs_thread_reclaim, vfsp);
1253 1255          }
1254 1256  
1255 1257          if (!fsp->fs_ronly) {
1256 1258                  TRANS_SBWRITE(ufsvfsp, TOP_MOUNT);
1257 1259                  if (error = geterror(ufsvfsp->vfs_bufp))
1258 1260                          goto out;
1259 1261          }
1260 1262  
1261 1263          /* fix-on-panic initialization */
1262 1264          if (isroot && !(flags & UFSMNT_ONERROR_FLGMASK))
1263 1265                  flags |= UFSMNT_ONERROR_PANIC;  /* XXX ..._RDONLY */
1264 1266  
1265 1267          if ((error = ufsfx_mount(ufsvfsp, flags)) != 0)
1266 1268                  goto out;
1267 1269  
1268 1270          if (why == ROOT_INIT && isroot)
1269 1271                  rootvp = devvp;
1270 1272  
1271 1273          return (0);
1272 1274  out:
1273 1275          if (error == 0)
1274 1276                  error = EIO;
1275 1277          if (rvp) {
1276 1278                  /* the following sequence is similar to ufs_unmount() */
1277 1279  
1278 1280                  /*
1279 1281                   * There's a problem that ufs_iget() puts inodes into
1280 1282                   * the inode cache before it returns them.  If someone
1281 1283                   * traverses that cache and gets a reference to our
1282 1284                   * inode, there's a chance they'll still be using it
1283 1285                   * after we've destroyed it.  This is a hard race to
1284 1286                   * hit, but it's happened (putting in a medium delay
1285 1287                   * here, and a large delay in ufs_scan_inodes() for
1286 1288                   * inodes on the device we're bailing out on, makes
1287 1289                   * the race easy to demonstrate).  The symptom is some
1288 1290                   * other part of UFS faulting on bad inode contents,
1289 1291                   * or when grabbing one of the locks inside the inode,
1290 1292                   * etc.  The usual victim is ufs_scan_inodes() or
1291 1293                   * someone called by it.
1292 1294                   */
1293 1295  
1294 1296                  /*
1295 1297                   * First, isolate it so that no new references can be
1296 1298                   * gotten via the inode cache.
1297 1299                   */
1298 1300                  ihm = &ih_lock[INOHASH(UFSROOTINO)];
1299 1301                  mutex_enter(ihm);
1300 1302                  remque(rip);
1301 1303                  mutex_exit(ihm);
1302 1304  
1303 1305                  /*
1304 1306                   * Now wait for all outstanding references except our
1305 1307                   * own to drain.  This could, in theory, take forever,
1306 1308                   * so don't wait *too* long.  If we time out, mark
1307 1309                   * it stale and leak it, so we don't hit the problem
1308 1310                   * described above.
1309 1311                   *
1310 1312                   * Note that v_count is an int, which means we can read
1311 1313                   * it in one operation.  Thus, there's no need to lock
1312 1314                   * around our tests.
1313 1315                   */
1314 1316                  elapsed = 0;
1315 1317                  while ((rvp->v_count > 1) && (elapsed < ufs_mount_timeout)) {
1316 1318                          delay(ufs_mount_error_delay * drv_usectohz(1000));
1317 1319                          elapsed += ufs_mount_error_delay;
1318 1320                  }
1319 1321  
1320 1322                  if (rvp->v_count > 1) {
1321 1323                          mutex_enter(&rip->i_tlock);
1322 1324                          rip->i_flag |= ISTALE;
1323 1325                          mutex_exit(&rip->i_tlock);
1324 1326                          cmn_err(CE_WARN,
1325 1327                              "Timed out while cleaning up after "
1326 1328                              "failed mount of %s", path);
1327 1329                  } else {
1328 1330  
1329 1331                          /*
1330 1332                           * Now we're the only one with a handle left, so tear
1331 1333                           * it down the rest of the way.
1332 1334                           */
1333 1335                          if (ufs_rmidle(rip))
1334 1336                                  VN_RELE(rvp);
1335 1337                          ufs_si_del(rip);
1336 1338                          rip->i_ufsvfs = NULL;
1337 1339                          rvp->v_vfsp = NULL;
1338 1340                          rvp->v_type = VBAD;
1339 1341                          VN_RELE(rvp);
1340 1342                  }
1341 1343          }
1342 1344          if (needtrans) {
1343 1345                  TRANS_MATA_UMOUNT(ufsvfsp);
1344 1346          }
1345 1347          if (ufsvfsp) {
1346 1348                  ufs_vfs_remove(ufsvfsp);
1347 1349                  ufs_thread_exit(&ufsvfsp->vfs_delete);
1348 1350                  ufs_thread_exit(&ufsvfsp->vfs_reclaim);
1349 1351                  mutex_destroy(&ufsvfsp->vfs_lock);
1350 1352                  if (ufsvfsp->vfs_log) {
1351 1353                          lufs_unsnarf(ufsvfsp);
1352 1354                  }
1353 1355                  kmem_free(ufsvfsp, sizeof (struct ufsvfs));
1354 1356          }
1355 1357          if (bp) {
1356 1358                  bp->b_flags |= (B_STALE|B_AGE);
1357 1359                  brelse(bp);
1358 1360          }
1359 1361          if (tp) {
1360 1362                  tp->b_flags |= (B_STALE|B_AGE);
1361 1363                  brelse(tp);
1362 1364          }
1363 1365          if (needclose) {
1364 1366                  (void) VOP_CLOSE(devvp, (vfsp->vfs_flag & VFS_RDONLY) ?
1365 1367                      FREAD : FREAD|FWRITE, 1, (offset_t)0, cr, NULL);
1366 1368                  bflush(dev);
1367 1369                  (void) bfinval(dev, 1);
1368 1370          }
1369 1371          return (error);
1370 1372  }
1371 1373  
1372 1374  /*
1373 1375   * vfs operations
1374 1376   */
1375 1377  static int
1376 1378  ufs_unmount(struct vfs *vfsp, int fflag, struct cred *cr)
1377 1379  {
1378 1380          dev_t           dev             = vfsp->vfs_dev;
1379 1381          struct ufsvfs   *ufsvfsp        = (struct ufsvfs *)vfsp->vfs_data;
1380 1382          struct fs       *fs             = ufsvfsp->vfs_fs;
1381 1383          struct ulockfs  *ulp            = &ufsvfsp->vfs_ulockfs;
1382 1384          struct vnode    *bvp, *vp;
1383 1385          struct buf      *bp;
1384 1386          struct inode    *ip, *inext, *rip;
1385 1387          union ihead     *ih;
1386 1388          int             error, flag, i;
1387 1389          struct lockfs   lockfs;
1388 1390          int             poll_events = POLLPRI;
1389 1391          extern struct pollhead ufs_pollhd;
1390 1392          refstr_t        *mountpoint;
1391 1393  
1392 1394          ASSERT(vfs_lock_held(vfsp));
1393 1395  
1394 1396          if (secpolicy_fs_unmount(cr, vfsp) != 0)
1395 1397                  return (EPERM);
1396 1398          /*
1397 1399           * Forced unmount is now supported through the
1398 1400           * lockfs protocol.
1399 1401           */
1400 1402          if (fflag & MS_FORCE) {
1401 1403                  /*
1402 1404                   * Mark the filesystem as being unmounted now in
1403 1405                   * case of a forcible umount before we take any
1404 1406                   * locks inside UFS to prevent racing with a VFS_VGET()
1405 1407                   * request. Throw these VFS_VGET() requests away for
1406 1408                   * the duration of the forcible umount so they won't
1407 1409                   * use stale or even freed data later on when we're done.
1408 1410                   * It may happen that the VFS has had a additional hold
1409 1411                   * placed on it by someone other than UFS and thus will
1410 1412                   * not get freed immediately once we're done with the
1411 1413                   * umount by dounmount() - use VFS_UNMOUNTED to inform
1412 1414                   * users of this still-alive VFS that its corresponding
1413 1415                   * filesystem being gone so they can detect that and error
1414 1416                   * out.
1415 1417                   */
1416 1418                  vfsp->vfs_flag |= VFS_UNMOUNTED;
1417 1419  
1418 1420                  ufs_thread_suspend(&ufsvfsp->vfs_delete);
1419 1421                  mutex_enter(&ulp->ul_lock);
1420 1422                  /*
1421 1423                   * If file system is already hard locked,
1422 1424                   * unmount the file system, otherwise
1423 1425                   * hard lock it before unmounting.
1424 1426                   */
1425 1427                  if (!ULOCKFS_IS_HLOCK(ulp)) {
1426 1428                          atomic_inc_ulong(&ufs_quiesce_pend);
1427 1429                          lockfs.lf_lock = LOCKFS_HLOCK;
1428 1430                          lockfs.lf_flags = 0;
1429 1431                          lockfs.lf_key = ulp->ul_lockfs.lf_key + 1;
1430 1432                          lockfs.lf_comlen = 0;
1431 1433                          lockfs.lf_comment = NULL;
1432 1434                          ufs_freeze(ulp, &lockfs);
1433 1435                          ULOCKFS_SET_BUSY(ulp);
1434 1436                          LOCKFS_SET_BUSY(&ulp->ul_lockfs);
1435 1437                          (void) ufs_quiesce(ulp);
1436 1438                          (void) ufs_flush(vfsp);
1437 1439                          (void) ufs_thaw(vfsp, ufsvfsp, ulp);
1438 1440                          atomic_dec_ulong(&ufs_quiesce_pend);
1439 1441                          ULOCKFS_CLR_BUSY(ulp);
1440 1442                          LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1441 1443                          poll_events |= POLLERR;
1442 1444                          pollwakeup(&ufs_pollhd, poll_events);
1443 1445                  }
1444 1446                  ufs_thread_continue(&ufsvfsp->vfs_delete);
1445 1447                  mutex_exit(&ulp->ul_lock);
1446 1448          }
1447 1449  
1448 1450          /* let all types of writes go through */
1449 1451          ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
1450 1452  
1451 1453          /* coordinate with global hlock thread */
1452 1454          if (TRANS_ISTRANS(ufsvfsp) && (ufsvfsp->vfs_validfs == UT_HLOCKING)) {
1453 1455                  /*
1454 1456                   * last possibility for a forced umount to fail hence clear
1455 1457                   * VFS_UNMOUNTED if appropriate.
1456 1458                   */
1457 1459                  if (fflag & MS_FORCE)
1458 1460                          vfsp->vfs_flag &= ~VFS_UNMOUNTED;
1459 1461                  return (EAGAIN);
1460 1462          }
1461 1463  
1462 1464          ufsvfsp->vfs_validfs = UT_UNMOUNTED;
1463 1465  
1464 1466          /* kill the reclaim thread */
1465 1467          ufs_thread_exit(&ufsvfsp->vfs_reclaim);
1466 1468  
1467 1469          /* suspend the delete thread */
1468 1470          ufs_thread_suspend(&ufsvfsp->vfs_delete);
1469 1471  
1470 1472          /*
1471 1473           * drain the delete and idle queues
1472 1474           */
1473 1475          ufs_delete_drain(vfsp, -1, 1);
1474 1476          ufs_idle_drain(vfsp);
1475 1477  
1476 1478          /*
1477 1479           * use the lockfs protocol to prevent new ops from starting
1478 1480           * a forcible umount can not fail beyond this point as
1479 1481           * we hard-locked the filesystem and drained all current consumers
1480 1482           * before.
1481 1483           */
1482 1484          mutex_enter(&ulp->ul_lock);
1483 1485  
1484 1486          /*
1485 1487           * if the file system is busy; return EBUSY
1486 1488           */
1487 1489          if (ulp->ul_vnops_cnt || ulp->ul_falloc_cnt || ULOCKFS_IS_SLOCK(ulp)) {
1488 1490                  error = EBUSY;
1489 1491                  goto out;
1490 1492          }
1491 1493  
1492 1494          /*
1493 1495           * if this is not a forced unmount (!hard/error locked), then
1494 1496           * get rid of every inode except the root and quota inodes
1495 1497           * also, commit any outstanding transactions
1496 1498           */
1497 1499          if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp))
1498 1500                  if (error = ufs_flush(vfsp))
1499 1501                          goto out;
1500 1502  
1501 1503          /*
1502 1504           * ignore inodes in the cache if fs is hard locked or error locked
1503 1505           */
1504 1506          rip = VTOI(ufsvfsp->vfs_root);
1505 1507          if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp)) {
1506 1508                  /*
1507 1509                   * Otherwise, only the quota and root inodes are in the cache.
1508 1510                   *
1509 1511                   * Avoid racing with ufs_update() and ufs_sync().
1510 1512                   */
1511 1513                  mutex_enter(&ufs_scan_lock);
1512 1514  
1513 1515                  for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
1514 1516                          mutex_enter(&ih_lock[i]);
1515 1517                          for (ip = ih->ih_chain[0];
1516 1518                              ip != (struct inode *)ih;
1517 1519                              ip = ip->i_forw) {
1518 1520                                  if (ip->i_ufsvfs != ufsvfsp)
1519 1521                                          continue;
1520 1522                                  if (ip == ufsvfsp->vfs_qinod)
1521 1523                                          continue;
1522 1524                                  if (ip == rip && ITOV(ip)->v_count == 1)
1523 1525                                          continue;
1524 1526                                  mutex_exit(&ih_lock[i]);
1525 1527                                  mutex_exit(&ufs_scan_lock);
1526 1528                                  error = EBUSY;
1527 1529                                  goto out;
1528 1530                          }
1529 1531                          mutex_exit(&ih_lock[i]);
1530 1532                  }
1531 1533                  mutex_exit(&ufs_scan_lock);
1532 1534          }
1533 1535  
1534 1536          /*
1535 1537           * if a snapshot exists and this is a forced unmount, then delete
1536 1538           * the snapshot.  Otherwise return EBUSY.  This will insure the
1537 1539           * snapshot always belongs to a valid file system.
1538 1540           */
1539 1541          if (ufsvfsp->vfs_snapshot) {
1540 1542                  if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
1541 1543                          (void) fssnap_delete(&ufsvfsp->vfs_snapshot);
1542 1544                  } else {
1543 1545                          error = EBUSY;
1544 1546                          goto out;
1545 1547                  }
1546 1548          }
1547 1549  
1548 1550          /*
1549 1551           * Close the quota file and invalidate anything left in the quota
1550 1552           * cache for this file system.  Pass kcred to allow all quota
1551 1553           * manipulations.
1552 1554           */
1553 1555          (void) closedq(ufsvfsp, kcred);
1554 1556          invalidatedq(ufsvfsp);
1555 1557          /*
1556 1558           * drain the delete and idle queues
1557 1559           */
1558 1560          ufs_delete_drain(vfsp, -1, 0);
1559 1561          ufs_idle_drain(vfsp);
1560 1562  
1561 1563          /*
1562 1564           * discard the inodes for this fs (including root, shadow, and quota)
1563 1565           */
1564 1566          for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
1565 1567                  mutex_enter(&ih_lock[i]);
1566 1568                  for (inext = 0, ip = ih->ih_chain[0];
1567 1569                      ip != (struct inode *)ih;
1568 1570                      ip = inext) {
1569 1571                          inext = ip->i_forw;
1570 1572                          if (ip->i_ufsvfs != ufsvfsp)
1571 1573                                  continue;
1572 1574  
1573 1575                          /*
1574 1576                           * We've found the inode in the cache and as we
1575 1577                           * hold the hash mutex the inode can not
1576 1578                           * disappear from underneath us.
1577 1579                           * We also know it must have at least a vnode
1578 1580                           * reference count of 1.
1579 1581                           * We perform an additional VN_HOLD so the VN_RELE
1580 1582                           * in case we take the inode off the idle queue
1581 1583                           * can not be the last one.
1582 1584                           * It is safe to grab the writer contents lock here
1583 1585                           * to prevent a race with ufs_iinactive() putting
1584 1586                           * inodes into the idle queue while we operate on
1585 1587                           * this inode.
1586 1588                           */
1587 1589                          rw_enter(&ip->i_contents, RW_WRITER);
1588 1590  
1589 1591                          vp = ITOV(ip);
1590 1592                          VN_HOLD(vp)
1591 1593                          remque(ip);
1592 1594                          if (ufs_rmidle(ip))
1593 1595                                  VN_RELE(vp);
1594 1596                          ufs_si_del(ip);
1595 1597                          /*
1596 1598                           * rip->i_ufsvfsp is needed by bflush()
1597 1599                           */
1598 1600                          if (ip != rip)
1599 1601                                  ip->i_ufsvfs = NULL;
1600 1602                          /*
1601 1603                           * Set vnode's vfsops to dummy ops, which return
1602 1604                           * EIO. This is needed to forced unmounts to work
1603 1605                           * with lofs/nfs properly.
1604 1606                           */
1605 1607                          if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp))
1606 1608                                  vp->v_vfsp = &EIO_vfs;
1607 1609                          else
1608 1610                                  vp->v_vfsp = NULL;
1609 1611                          vp->v_type = VBAD;
1610 1612  
1611 1613                          rw_exit(&ip->i_contents);
1612 1614  
1613 1615                          VN_RELE(vp);
1614 1616                  }
1615 1617                  mutex_exit(&ih_lock[i]);
1616 1618          }
1617 1619          ufs_si_cache_flush(dev);
1618 1620  
1619 1621          /*
1620 1622           * kill the delete thread and drain the idle queue
1621 1623           */
1622 1624          ufs_thread_exit(&ufsvfsp->vfs_delete);
1623 1625          ufs_idle_drain(vfsp);
1624 1626  
1625 1627          bp = ufsvfsp->vfs_bufp;
1626 1628          bvp = ufsvfsp->vfs_devvp;
1627 1629          flag = !fs->fs_ronly;
1628 1630          if (flag) {
1629 1631                  bflush(dev);
1630 1632                  if (fs->fs_clean != FSBAD) {
1631 1633                          if (fs->fs_clean == FSSTABLE)
1632 1634                                  fs->fs_clean = FSCLEAN;
1633 1635                          fs->fs_reclaim &= ~FS_RECLAIM;
1634 1636                  }
1635 1637                  if (TRANS_ISTRANS(ufsvfsp) &&
1636 1638                      !TRANS_ISERROR(ufsvfsp) &&
1637 1639                      !ULOCKFS_IS_HLOCK(ulp) &&
1638 1640                      (fs->fs_rolled == FS_NEED_ROLL)) {
1639 1641                          /*
1640 1642                           * ufs_flush() above has flushed the last Moby.
1641 1643                           * This is needed to ensure the following superblock
1642 1644                           * update really is the last metadata update
1643 1645                           */
1644 1646                          error = ufs_putsummaryinfo(dev, ufsvfsp, fs);
1645 1647                          if (error == 0) {
1646 1648                                  fs->fs_rolled = FS_ALL_ROLLED;
1647 1649                          }
1648 1650                  }
1649 1651                  TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_UNMOUNT);
1650 1652                  /*
1651 1653                   * push this last transaction
1652 1654                   */
1653 1655                  curthread->t_flag |= T_DONTBLOCK;
1654 1656                  TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UNMOUNT, TOP_COMMIT_SIZE,
1655 1657                      error);
1656 1658                  if (!error)
1657 1659                          TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UNMOUNT,
1658 1660                              TOP_COMMIT_SIZE);
1659 1661                  curthread->t_flag &= ~T_DONTBLOCK;
1660 1662          }
1661 1663  
1662 1664          TRANS_MATA_UMOUNT(ufsvfsp);
1663 1665          lufs_unsnarf(ufsvfsp);          /* Release the in-memory structs */
1664 1666          ufsfx_unmount(ufsvfsp);         /* fix-on-panic bookkeeping */
1665 1667          kmem_free(fs->fs_u.fs_csp, fs->fs_cssize);
1666 1668  
1667 1669          bp->b_flags |= B_STALE|B_AGE;
1668 1670          ufsvfsp->vfs_bufp = NULL;       /* don't point at freed buf */
1669 1671          brelse(bp);                     /* free the superblock buf */
1670 1672  
1671 1673          (void) VOP_PUTPAGE(common_specvp(bvp), (offset_t)0, (size_t)0,
1672 1674              B_INVAL, cr, NULL);
1673 1675          (void) VOP_CLOSE(bvp, flag, 1, (offset_t)0, cr, NULL);
1674 1676          bflush(dev);
1675 1677          (void) bfinval(dev, 1);
1676 1678          VN_RELE(bvp);
1677 1679  
1678 1680          /*
1679 1681           * It is now safe to NULL out the ufsvfs pointer and discard
1680 1682           * the root inode.
1681 1683           */
1682 1684          rip->i_ufsvfs = NULL;
1683 1685          VN_RELE(ITOV(rip));
1684 1686  
1685 1687          /* free up lockfs comment structure, if any */
1686 1688          if (ulp->ul_lockfs.lf_comlen && ulp->ul_lockfs.lf_comment)
1687 1689                  kmem_free(ulp->ul_lockfs.lf_comment, ulp->ul_lockfs.lf_comlen);
1688 1690  
1689 1691          /*
1690 1692           * Remove from instance list.
1691 1693           */
1692 1694          ufs_vfs_remove(ufsvfsp);
1693 1695  
1694 1696          /*
1695 1697           * For a forcible unmount, threads may be asleep in
1696 1698           * ufs_lockfs_begin/ufs_check_lockfs.  These threads will need
1697 1699           * the ufsvfs structure so we don't free it, yet.  ufs_update
1698 1700           * will free it up after awhile.
1699 1701           */
1700 1702          if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
1701 1703                  extern kmutex_t         ufsvfs_mutex;
1702 1704                  extern struct ufsvfs    *ufsvfslist;
1703 1705  
1704 1706                  mutex_enter(&ufsvfs_mutex);
1705 1707                  ufsvfsp->vfs_dontblock = 1;
1706 1708                  ufsvfsp->vfs_next = ufsvfslist;
1707 1709                  ufsvfslist = ufsvfsp;
1708 1710                  mutex_exit(&ufsvfs_mutex);
1709 1711                  /* wakeup any suspended threads */
1710 1712                  cv_broadcast(&ulp->ul_cv);
1711 1713                  mutex_exit(&ulp->ul_lock);
1712 1714          } else {
1713 1715                  mutex_destroy(&ufsvfsp->vfs_lock);
1714 1716                  kmem_free(ufsvfsp, sizeof (struct ufsvfs));
1715 1717          }
1716 1718  
1717 1719          /*
1718 1720           * Now mark the filesystem as unmounted since we're done with it.
1719 1721           */
1720 1722          vfsp->vfs_flag |= VFS_UNMOUNTED;
1721 1723  
1722 1724          return (0);
1723 1725  out:
1724 1726          /* open the fs to new ops */
1725 1727          cv_broadcast(&ulp->ul_cv);
1726 1728          mutex_exit(&ulp->ul_lock);
1727 1729  
1728 1730          if (TRANS_ISTRANS(ufsvfsp)) {
1729 1731                  /* allow the delete thread to continue */
1730 1732                  ufs_thread_continue(&ufsvfsp->vfs_delete);
1731 1733                  /* restart the reclaim thread */
1732 1734                  ufs_thread_start(&ufsvfsp->vfs_reclaim, ufs_thread_reclaim,
1733 1735                      vfsp);
1734 1736                  /* coordinate with global hlock thread */
1735 1737                  ufsvfsp->vfs_validfs = UT_MOUNTED;
1736 1738                  /* check for trans errors during umount */
1737 1739                  ufs_trans_onerror();
1738 1740  
1739 1741                  /*
1740 1742                   * if we have a separate /usr it will never unmount
1741 1743                   * when halting. In order to not re-read all the
1742 1744                   * cylinder group summary info on mounting after
1743 1745                   * reboot the logging of summary info is re-enabled
1744 1746                   * and the super block written out.
1745 1747                   */
1746 1748                  mountpoint = vfs_getmntpoint(vfsp);
1747 1749                  if ((fs->fs_si == FS_SI_OK) &&
1748 1750                      (strcmp("/usr", refstr_value(mountpoint)) == 0)) {
1749 1751                          ufsvfsp->vfs_nolog_si = 0;
1750 1752                          UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp);
1751 1753                  }
1752 1754                  refstr_rele(mountpoint);
1753 1755          }
1754 1756  
1755 1757          return (error);
1756 1758  }
1757 1759  
1758 1760  static int
1759 1761  ufs_root(struct vfs *vfsp, struct vnode **vpp)
1760 1762  {
1761 1763          struct ufsvfs *ufsvfsp;
1762 1764          struct vnode *vp;
1763 1765  
1764 1766          if (!vfsp)
1765 1767                  return (EIO);
1766 1768  
1767 1769          ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1768 1770          if (!ufsvfsp || !ufsvfsp->vfs_root)
1769 1771                  return (EIO);   /* forced unmount */
1770 1772  
1771 1773          vp = ufsvfsp->vfs_root;
1772 1774          VN_HOLD(vp);
1773 1775          *vpp = vp;
1774 1776          return (0);
1775 1777  }
1776 1778  
1777 1779  /*
1778 1780   * Get file system statistics.
1779 1781   */
1780 1782  static int
1781 1783  ufs_statvfs(struct vfs *vfsp, struct statvfs64 *sp)
1782 1784  {
1783 1785          struct fs *fsp;
1784 1786          struct ufsvfs *ufsvfsp;
1785 1787          int blk, i;
1786 1788          long max_avail, used;
1787 1789          dev32_t d32;
1788 1790  
1789 1791          if (vfsp->vfs_flag & VFS_UNMOUNTED)
1790 1792                  return (EIO);
1791 1793  
1792 1794          ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1793 1795          fsp = ufsvfsp->vfs_fs;
1794 1796          if ((fsp->fs_magic != FS_MAGIC) && (fsp->fs_magic != MTB_UFS_MAGIC))
1795 1797                  return (EINVAL);
1796 1798          if (fsp->fs_magic == FS_MAGIC &&
1797 1799              (fsp->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
1798 1800              fsp->fs_version != UFS_VERSION_MIN))
1799 1801                  return (EINVAL);
1800 1802          if (fsp->fs_magic == MTB_UFS_MAGIC &&
1801 1803              (fsp->fs_version > MTB_UFS_VERSION_1 ||
1802 1804              fsp->fs_version < MTB_UFS_VERSION_MIN))
1803 1805                  return (EINVAL);
1804 1806  
1805 1807          /*
1806 1808           * get the basic numbers
1807 1809           */
1808 1810          (void) bzero(sp, sizeof (*sp));
1809 1811  
1810 1812          sp->f_bsize = fsp->fs_bsize;
1811 1813          sp->f_frsize = fsp->fs_fsize;
1812 1814          sp->f_blocks = (fsblkcnt64_t)fsp->fs_dsize;
1813 1815          sp->f_bfree = (fsblkcnt64_t)fsp->fs_cstotal.cs_nbfree * fsp->fs_frag +
1814 1816              fsp->fs_cstotal.cs_nffree;
1815 1817  
1816 1818          sp->f_files = (fsfilcnt64_t)fsp->fs_ncg * fsp->fs_ipg;
1817 1819          sp->f_ffree = (fsfilcnt64_t)fsp->fs_cstotal.cs_nifree;
1818 1820  
1819 1821          /*
1820 1822           * Adjust the numbers based on things waiting to be deleted.
1821 1823           * modifies f_bfree and f_ffree.  Afterwards, everything we
1822 1824           * come up with will be self-consistent.  By definition, this
1823 1825           * is a point-in-time snapshot, so the fact that the delete
1824 1826           * thread's probably already invalidated the results is not a
1825 1827           * problem.  Note that if the delete thread is ever extended to
1826 1828           * non-logging ufs, this adjustment must always be made.
1827 1829           */
1828 1830          if (TRANS_ISTRANS(ufsvfsp))
1829 1831                  ufs_delete_adjust_stats(ufsvfsp, sp);
1830 1832  
1831 1833          /*
1832 1834           * avail = MAX(max_avail - used, 0)
1833 1835           */
1834 1836          max_avail = fsp->fs_dsize - ufsvfsp->vfs_minfrags;
1835 1837  
1836 1838          used = (fsp->fs_dsize - sp->f_bfree);
1837 1839  
1838 1840          if (max_avail > used)
1839 1841                  sp->f_bavail = (fsblkcnt64_t)max_avail - used;
1840 1842          else
1841 1843                  sp->f_bavail = (fsblkcnt64_t)0;
1842 1844  
1843 1845          sp->f_favail = sp->f_ffree;
1844 1846          (void) cmpldev(&d32, vfsp->vfs_dev);
1845 1847          sp->f_fsid = d32;
1846 1848          (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
1847 1849          sp->f_flag = vf_to_stf(vfsp->vfs_flag);
1848 1850  
1849 1851          /* keep coordinated with ufs_l_pathconf() */
1850 1852          sp->f_namemax = MAXNAMLEN;
1851 1853  
1852 1854          if (fsp->fs_cpc == 0) {
1853 1855                  bzero(sp->f_fstr, 14);
1854 1856                  return (0);
1855 1857          }
1856 1858          blk = fsp->fs_spc * fsp->fs_cpc / NSPF(fsp);
1857 1859          for (i = 0; i < blk; i += fsp->fs_frag) /* CSTYLED */
1858 1860                  /* void */;
1859 1861          i -= fsp->fs_frag;
1860 1862          blk = i / fsp->fs_frag;
1861 1863          bcopy(&(fs_rotbl(fsp)[blk]), sp->f_fstr, 14);
1862 1864          return (0);
1863 1865  }
1864 1866  
1865 1867  /*
1866 1868   * Flush any pending I/O to file system vfsp.
1867 1869   * The ufs_update() routine will only flush *all* ufs files.
1868 1870   * If vfsp is non-NULL, only sync this ufs (in preparation
1869 1871   * for a umount).
1870 1872   */
1871 1873  /*ARGSUSED*/
1872 1874  static int
1873 1875  ufs_sync(struct vfs *vfsp, short flag, struct cred *cr)
1874 1876  {
1875 1877          struct ufsvfs *ufsvfsp;
1876 1878          struct fs *fs;
1877 1879          int cheap = flag & SYNC_ATTR;
1878 1880          int error;
1879 1881  
1880 1882          /*
1881 1883           * SYNC_CLOSE means we're rebooting.  Toss everything
1882 1884           * on the idle queue so we don't have to slog through
1883 1885           * a bunch of uninteresting inodes over and over again.
1884 1886           */
1885 1887          if (flag & SYNC_CLOSE)
1886 1888                  ufs_idle_drain(NULL);
1887 1889  
1888 1890          if (vfsp == NULL) {
1889 1891                  ufs_update(flag);
1890 1892                  return (0);
1891 1893          }
1892 1894  
1893 1895          /* Flush a single ufs */
1894 1896          if (!vfs_matchops(vfsp, ufs_vfsops) || vfs_lock(vfsp) != 0)
1895 1897                  return (0);
1896 1898  
1897 1899          ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1898 1900          if (!ufsvfsp)
1899 1901                  return (EIO);
1900 1902          fs = ufsvfsp->vfs_fs;
1901 1903          mutex_enter(&ufsvfsp->vfs_lock);
1902 1904  
1903 1905          if (ufsvfsp->vfs_dio &&
1904 1906              fs->fs_ronly == 0 &&
1905 1907              fs->fs_clean != FSBAD &&
1906 1908              fs->fs_clean != FSLOG) {
1907 1909                  /* turn off fast-io on unmount, so no fsck needed (4029401) */
1908 1910                  ufsvfsp->vfs_dio = 0;
1909 1911                  fs->fs_clean = FSACTIVE;
1910 1912                  fs->fs_fmod = 1;
1911 1913          }
1912 1914  
1913 1915          /* Write back modified superblock */
1914 1916          if (fs->fs_fmod == 0) {
1915 1917                  mutex_exit(&ufsvfsp->vfs_lock);
1916 1918          } else {
1917 1919                  if (fs->fs_ronly != 0) {
1918 1920                          mutex_exit(&ufsvfsp->vfs_lock);
1919 1921                          vfs_unlock(vfsp);
1920 1922                          return (ufs_fault(ufsvfsp->vfs_root,
1921 1923                              "fs = %s update: ro fs mod\n", fs->fs_fsmnt));
1922 1924                  }
1923 1925                  fs->fs_fmod = 0;
1924 1926                  mutex_exit(&ufsvfsp->vfs_lock);
1925 1927  
1926 1928                  TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_UPDATE);
1927 1929          }
1928 1930          vfs_unlock(vfsp);
1929 1931  
1930 1932          /*
1931 1933           * Avoid racing with ufs_update() and ufs_unmount().
1932 1934           *
1933 1935           */
1934 1936          mutex_enter(&ufs_scan_lock);
1935 1937  
1936 1938          (void) ufs_scan_inodes(1, ufs_sync_inode,
1937 1939              (void *)(uintptr_t)cheap, ufsvfsp);
1938 1940  
1939 1941          mutex_exit(&ufs_scan_lock);
1940 1942  
1941 1943          bflush((dev_t)vfsp->vfs_dev);
1942 1944  
1943 1945          /*
1944 1946           * commit any outstanding async transactions
1945 1947           */
1946 1948          curthread->t_flag |= T_DONTBLOCK;
1947 1949          TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE, error);
1948 1950          if (!error) {
1949 1951                  TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UPDATE,
1950 1952                      TOP_COMMIT_SIZE);
1951 1953          }
1952 1954          curthread->t_flag &= ~T_DONTBLOCK;
1953 1955  
1954 1956          return (0);
1955 1957  }
1956 1958  
1957 1959  
1958 1960  void
1959 1961  sbupdate(struct vfs *vfsp)
1960 1962  {
1961 1963          struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1962 1964          struct fs *fs = ufsvfsp->vfs_fs;
1963 1965          struct buf *bp;
1964 1966          int blks;
1965 1967          caddr_t space;
1966 1968          int i;
1967 1969          size_t size;
1968 1970  
1969 1971          /*
1970 1972           * for ulockfs processing, limit the superblock writes
1971 1973           */
1972 1974          if ((ufsvfsp->vfs_ulockfs.ul_sbowner) &&
1973 1975              (curthread != ufsvfsp->vfs_ulockfs.ul_sbowner)) {
1974 1976                  /* process later */
1975 1977                  fs->fs_fmod = 1;
1976 1978                  return;
1977 1979          }
1978 1980          ULOCKFS_SET_MOD((&ufsvfsp->vfs_ulockfs));
1979 1981  
1980 1982          if (TRANS_ISTRANS(ufsvfsp)) {
1981 1983                  mutex_enter(&ufsvfsp->vfs_lock);
1982 1984                  ufs_sbwrite(ufsvfsp);
1983 1985                  mutex_exit(&ufsvfsp->vfs_lock);
1984 1986                  return;
1985 1987          }
1986 1988  
1987 1989          blks = howmany(fs->fs_cssize, fs->fs_fsize);
1988 1990          space = (caddr_t)fs->fs_u.fs_csp;
1989 1991          for (i = 0; i < blks; i += fs->fs_frag) {
1990 1992                  size = fs->fs_bsize;
1991 1993                  if (i + fs->fs_frag > blks)
1992 1994                          size = (blks - i) * fs->fs_fsize;
1993 1995                  bp = UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev,
1994 1996                      (daddr_t)(fsbtodb(fs, fs->fs_csaddr + i)),
1995 1997                      fs->fs_bsize);
1996 1998                  bcopy(space, bp->b_un.b_addr, size);
1997 1999                  space += size;
1998 2000                  bp->b_bcount = size;
1999 2001                  UFS_BRWRITE(ufsvfsp, bp);
2000 2002          }
2001 2003          mutex_enter(&ufsvfsp->vfs_lock);
2002 2004          ufs_sbwrite(ufsvfsp);
2003 2005          mutex_exit(&ufsvfsp->vfs_lock);
2004 2006  }
2005 2007  
2006 2008  int ufs_vget_idle_count = 2;    /* Number of inodes to idle each time */
2007 2009  static int
2008 2010  ufs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
2009 2011  {
2010 2012          int error = 0;
2011 2013          struct ufid *ufid;
2012 2014          struct inode *ip;
2013 2015          struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
2014 2016          struct ulockfs *ulp;
2015 2017  
2016 2018          /*
2017 2019           * Check for unmounted filesystem.
2018 2020           */
2019 2021          if (vfsp->vfs_flag & VFS_UNMOUNTED) {
2020 2022                  error = EIO;
2021 2023                  goto errout;
2022 2024          }
2023 2025  
2024 2026          /*
2025 2027           * Keep the idle queue from getting too long by
2026 2028           * idling an inode before attempting to allocate another.
2027 2029           *    This operation must be performed before entering
2028 2030           *    lockfs or a transaction.
2029 2031           */
2030 2032          if (ufs_idle_q.uq_ne > ufs_idle_q.uq_hiwat)
2031 2033                  if ((curthread->t_flag & T_DONTBLOCK) == 0) {
2032 2034                          ins.in_vidles.value.ul += ufs_vget_idle_count;
2033 2035                          ufs_idle_some(ufs_vget_idle_count);
2034 2036                  }
2035 2037  
2036 2038          ufid = (struct ufid *)fidp;
2037 2039  
2038 2040          if (error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_VGET_MASK))
2039 2041                  goto errout;
2040 2042  
2041 2043          rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
2042 2044  
2043 2045          error = ufs_iget(vfsp, ufid->ufid_ino, &ip, CRED());
2044 2046  
2045 2047          rw_exit(&ufsvfsp->vfs_dqrwlock);
2046 2048  
2047 2049          ufs_lockfs_end(ulp);
2048 2050  
2049 2051          if (error)
2050 2052                  goto errout;
2051 2053  
2052 2054          /*
2053 2055           * Check if the inode has been deleted or freed or is in transient state
2054 2056           * since the last VFS_VGET() request for it, release it and don't return
2055 2057           * it to the caller, presumably NFS, as it's no longer valid.
2056 2058           */
2057 2059          if (ip->i_gen != ufid->ufid_gen || ip->i_mode == 0 ||
2058 2060              (ip->i_nlink <= 0)) {
2059 2061                  VN_RELE(ITOV(ip));
2060 2062                  error = EINVAL;
2061 2063                  goto errout;
2062 2064          }
2063 2065  
2064 2066          *vpp = ITOV(ip);
2065 2067          return (0);
2066 2068  
2067 2069  errout:
2068 2070          *vpp = NULL;
2069 2071          return (error);
2070 2072  }
2071 2073  
2072 2074  static int
2073 2075  ufsinit(int fstype, char *name)
2074 2076  {
2075 2077          static const fs_operation_def_t ufs_vfsops_template[] = {
2076 2078                  VFSNAME_MOUNT,          { .vfs_mount = ufs_mount },
2077 2079                  VFSNAME_UNMOUNT,        { .vfs_unmount = ufs_unmount },
2078 2080                  VFSNAME_ROOT,           { .vfs_root = ufs_root },
2079 2081                  VFSNAME_STATVFS,        { .vfs_statvfs = ufs_statvfs },
2080 2082                  VFSNAME_SYNC,           { .vfs_sync = ufs_sync },
2081 2083                  VFSNAME_VGET,           { .vfs_vget = ufs_vget },
2082 2084                  VFSNAME_MOUNTROOT,      { .vfs_mountroot = ufs_mountroot },
2083 2085                  NULL,                   NULL
2084 2086          };
2085 2087          int error;
2086 2088  
2087 2089          ufsfstype = fstype;
2088 2090  
2089 2091          error = vfs_setfsops(fstype, ufs_vfsops_template, &ufs_vfsops);
2090 2092          if (error != 0) {
2091 2093                  cmn_err(CE_WARN, "ufsinit: bad vfs ops template");
2092 2094                  return (error);
2093 2095          }
2094 2096  
2095 2097          error = vn_make_ops(name, ufs_vnodeops_template, &ufs_vnodeops);
2096 2098          if (error != 0) {
2097 2099                  (void) vfs_freevfsops_by_type(fstype);
2098 2100                  cmn_err(CE_WARN, "ufsinit: bad vnode ops template");
2099 2101                  return (error);
2100 2102          }
2101 2103  
2102 2104          ufs_iinit();
2103 2105          return (0);
2104 2106  }
  
    | 
      ↓ open down ↓ | 
    1277 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX