big-one New usr/src/uts/common/fs/zfs/zfs

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  24  * Copyright (c) 2014 Integros [integros.com]
  25  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  26  */
  27 
  28 /* Portions Copyright 2010 Robert Milkowski */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/systm.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/kmem.h>
  35 #include <sys/pathname.h>
  36 #include <sys/vnode.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vfs_opreg.h>
  39 #include <sys/mntent.h>
  40 #include <sys/mount.h>
  41 #include <sys/cmn_err.h>
  42 #include "fs/fs_subr.h"
  43 #include <sys/zfs_znode.h>
  44 #include <sys/zfs_dir.h>
  45 #include <sys/zil.h>
  46 #include <sys/fs/zfs.h>
  47 #include <sys/dmu.h>
  48 #include <sys/dsl_dir.h>
  49 #include <sys/dsl_prop.h>
  50 #include <sys/dsl_dataset.h>
  51 #include <sys/dsl_deleg.h>
  52 #include <sys/spa.h>
  53 #include <sys/zap.h>
  54 #include <sys/sa.h>
  55 #include <sys/sa_impl.h>
  56 #include <sys/varargs.h>
  57 #include <sys/policy.h>
  58 #include <sys/atomic.h>
  59 #include <sys/mkdev.h>
  60 #include <sys/modctl.h>
  61 #include <sys/refstr.h>
  62 #include <sys/zfs_ioctl.h>
  63 #include <sys/zfs_ctldir.h>
  64 #include <sys/zfs_fuid.h>
  65 #include <sys/bootconf.h>
  66 #include <sys/sunddi.h>
  67 #include <sys/dnlc.h>
  68 #include <sys/dmu_objset.h>
  69 #include <sys/spa_boot.h>
  70 #include "zfs_comutil.h"
  71 
  72 int zfsfstype;
  73 vfsops_t *zfs_vfsops = NULL;
  74 static major_t zfs_major;
  75 static minor_t zfs_minor;
  76 static kmutex_t zfs_dev_mtx;
  77 
  78 extern int sys_shutdown;
  79 
  80 static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr);
  81 static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr);
  82 static int zfs_mountroot(vfs_t *vfsp, enum whymountroot);
  83 static int zfs_root(vfs_t *vfsp, vnode_t **vpp);
  84 static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp);
  85 static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp);
  86 static void zfs_freevfs(vfs_t *vfsp);
  87 
  88 static const fs_operation_def_t zfs_vfsops_template[] = {
  89         VFSNAME_MOUNT,          { .vfs_mount = zfs_mount },
  90         VFSNAME_MOUNTROOT,      { .vfs_mountroot = zfs_mountroot },
  91         VFSNAME_UNMOUNT,        { .vfs_unmount = zfs_umount },
  92         VFSNAME_ROOT,           { .vfs_root = zfs_root },
  93         VFSNAME_STATVFS,        { .vfs_statvfs = zfs_statvfs },
  94         VFSNAME_SYNC,           { .vfs_sync = zfs_sync },
  95         VFSNAME_VGET,           { .vfs_vget = zfs_vget },
  96         VFSNAME_FREEVFS,        { .vfs_freevfs = zfs_freevfs },
  97         NULL,                   NULL
  98 };
  99 
 100 /*
 101  * We need to keep a count of active fs's.
 102  * This is necessary to prevent our module
 103  * from being unloaded after a umount -f
 104  */
 105 static uint32_t zfs_active_fs_count = 0;
 106 
 107 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
 108 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
 109 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
 110 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
 111 
 112 /*
 113  * MO_DEFAULT is not used since the default value is determined
 114  * by the equivalent property.
 115  */
 116 static mntopt_t mntopts[] = {
 117         { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL },
 118         { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL },
 119         { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL },
 120         { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL }
 121 };
 122 
 123 static mntopts_t zfs_mntopts = {
 124         sizeof (mntopts) / sizeof (mntopt_t),
 125         mntopts
 126 };
 127 
 128 /*ARGSUSED*/
 129 int
 130 zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
 131 {
 132         /*
 133          * Data integrity is job one.  We don't want a compromised kernel
 134          * writing to the storage pool, so we never sync during panic.
 135          */
 136         if (panicstr)
 137                 return (0);
 138 
 139         /*
 140          * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
 141          * to sync metadata, which they would otherwise cache indefinitely.
 142          * Semantically, the only requirement is that the sync be initiated.
 143          * The DMU syncs out txgs frequently, so there's nothing to do.
 144          */
 145         if (flag & SYNC_ATTR)
 146                 return (0);
 147 
 148         if (vfsp != NULL) {
 149                 /*
 150                  * Sync a specific filesystem.
 151                  */
 152                 zfsvfs_t *zfsvfs = vfsp->vfs_data;
 153                 dsl_pool_t *dp;
 154 
 155                 ZFS_ENTER(zfsvfs);
 156                 dp = dmu_objset_pool(zfsvfs->z_os);
 157 
 158                 /*
 159                  * If the system is shutting down, then skip any
 160                  * filesystems which may exist on a suspended pool.
 161                  */
 162                 if (sys_shutdown && spa_suspended(dp->dp_spa)) {
 163                         ZFS_EXIT(zfsvfs);
 164                         return (0);
 165                 }
 166 
 167                 if (zfsvfs->z_log != NULL)
 168                         zil_commit(zfsvfs->z_log, 0);
 169 
 170                 ZFS_EXIT(zfsvfs);
 171         } else {
 172                 /*
 173                  * Sync all ZFS filesystems.  This is what happens when you
 174                  * run sync(1M).  Unlike other filesystems, ZFS honors the
 175                  * request by waiting for all pools to commit all dirty data.
 176                  */
 177                 spa_sync_allpools();
 178         }
 179 
 180         return (0);
 181 }
 182 
 183 static int
 184 zfs_create_unique_device(dev_t *dev)
 185 {
 186         major_t new_major;
 187 
 188         do {
 189                 ASSERT3U(zfs_minor, <=, MAXMIN32);
 190                 minor_t start = zfs_minor;
 191                 do {
 192                         mutex_enter(&zfs_dev_mtx);
 193                         if (zfs_minor >= MAXMIN32) {
 194                                 /*
 195                                  * If we're still using the real major
 196                                  * keep out of /dev/zfs and /dev/zvol minor
 197                                  * number space.  If we're using a getudev()'ed
 198                                  * major number, we can use all of its minors.
 199                                  */
 200                                 if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
 201                                         zfs_minor = ZFS_MIN_MINOR;
 202                                 else
 203                                         zfs_minor = 0;
 204                         } else {
 205                                 zfs_minor++;
 206                         }
 207                         *dev = makedevice(zfs_major, zfs_minor);
 208                         mutex_exit(&zfs_dev_mtx);
 209                 } while (vfs_devismounted(*dev) && zfs_minor != start);
 210                 if (zfs_minor == start) {
 211                         /*
 212                          * We are using all ~262,000 minor numbers for the
 213                          * current major number.  Create a new major number.
 214                          */
 215                         if ((new_major = getudev()) == (major_t)-1) {
 216                                 cmn_err(CE_WARN,
 217                                     "zfs_mount: Can't get unique major "
 218                                     "device number.");
 219                                 return (-1);
 220                         }
 221                         mutex_enter(&zfs_dev_mtx);
 222                         zfs_major = new_major;
 223                         zfs_minor = 0;
 224 
 225                         mutex_exit(&zfs_dev_mtx);
 226                 } else {
 227                         break;
 228                 }
 229                 /* CONSTANTCONDITION */
 230         } while (1);
 231 
 232         return (0);
 233 }
 234 
 235 static void
 236 atime_changed_cb(void *arg, uint64_t newval)
 237 {
 238         zfsvfs_t *zfsvfs = arg;
 239 
 240         if (newval == TRUE) {
 241                 zfsvfs->z_atime = TRUE;
 242                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
 243                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
 244         } else {
 245                 zfsvfs->z_atime = FALSE;
 246                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
 247                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
 248         }
 249 }
 250 
 251 static void
 252 xattr_changed_cb(void *arg, uint64_t newval)
 253 {
 254         zfsvfs_t *zfsvfs = arg;
 255 
 256         if (newval == TRUE) {
 257                 /* XXX locking on vfs_flag? */
 258                 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
 259                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
 260                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
 261         } else {
 262                 /* XXX locking on vfs_flag? */
 263                 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
 264                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
 265                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
 266         }
 267 }
 268 
 269 static void
 270 blksz_changed_cb(void *arg, uint64_t newval)
 271 {
 272         zfsvfs_t *zfsvfs = arg;
 273         ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
 274         ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
 275         ASSERT(ISP2(newval));
 276 
 277         zfsvfs->z_max_blksz = newval;
 278         zfsvfs->z_vfs->vfs_bsize = newval;
 279 }
 280 
 281 static void
 282 readonly_changed_cb(void *arg, uint64_t newval)
 283 {
 284         zfsvfs_t *zfsvfs = arg;
 285 
 286         if (newval) {
 287                 /* XXX locking on vfs_flag? */
 288                 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
 289                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
 290                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
 291         } else {
 292                 /* XXX locking on vfs_flag? */
 293                 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
 294                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
 295                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
 296         }
 297 }
 298 
 299 static void
 300 devices_changed_cb(void *arg, uint64_t newval)
 301 {
 302         zfsvfs_t *zfsvfs = arg;
 303 
 304         if (newval == FALSE) {
 305                 zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES;
 306                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES);
 307                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0);
 308         } else {
 309                 zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES;
 310                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES);
 311                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0);
 312         }
 313 }
 314 
 315 static void
 316 setuid_changed_cb(void *arg, uint64_t newval)
 317 {
 318         zfsvfs_t *zfsvfs = arg;
 319 
 320         if (newval == FALSE) {
 321                 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
 322                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
 323                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
 324         } else {
 325                 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
 326                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
 327                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
 328         }
 329 }
 330 
 331 static void
 332 exec_changed_cb(void *arg, uint64_t newval)
 333 {
 334         zfsvfs_t *zfsvfs = arg;
 335 
 336         if (newval == FALSE) {
 337                 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
 338                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
 339                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
 340         } else {
 341                 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
 342                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
 343                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
 344         }
 345 }
 346 
 347 /*
 348  * The nbmand mount option can be changed at mount time.
 349  * We can't allow it to be toggled on live file systems or incorrect
 350  * behavior may be seen from cifs clients
 351  *
 352  * This property isn't registered via dsl_prop_register(), but this callback
 353  * will be called when a file system is first mounted
 354  */
 355 static void
 356 nbmand_changed_cb(void *arg, uint64_t newval)
 357 {
 358         zfsvfs_t *zfsvfs = arg;
 359         if (newval == FALSE) {
 360                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
 361                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
 362         } else {
 363                 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
 364                 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
 365         }
 366 }
 367 
 368 static void
 369 snapdir_changed_cb(void *arg, uint64_t newval)
 370 {
 371         zfsvfs_t *zfsvfs = arg;
 372 
 373         zfsvfs->z_show_ctldir = newval;
 374 }
 375 
 376 static void
 377 vscan_changed_cb(void *arg, uint64_t newval)
 378 {
 379         zfsvfs_t *zfsvfs = arg;
 380 
 381         zfsvfs->z_vscan = newval;
 382 }
 383 
 384 static void
 385 acl_mode_changed_cb(void *arg, uint64_t newval)
 386 {
 387         zfsvfs_t *zfsvfs = arg;
 388 
 389         zfsvfs->z_acl_mode = newval;
 390 }
 391 
 392 static void
 393 acl_inherit_changed_cb(void *arg, uint64_t newval)
 394 {
 395         zfsvfs_t *zfsvfs = arg;
 396 
 397         zfsvfs->z_acl_inherit = newval;
 398 }
 399 
 400 static void
 401 rate_changed_cb(void *arg, uint64_t newval)
 402 {
 403         zfsvfs_t *zfsvfs = arg;
 404 
 405         if (newval == UINT64_MAX)
 406                 newval = 0;
 407         zfsvfs->z_rate.rate_cap = newval;
 408 }
 409 
 410 static int
 411 zfs_register_callbacks(vfs_t *vfsp)
 412 {
 413         struct dsl_dataset *ds = NULL;
 414         objset_t *os = NULL;
 415         zfsvfs_t *zfsvfs = NULL;
 416         uint64_t nbmand;
 417         boolean_t readonly = B_FALSE;
 418         boolean_t do_readonly = B_FALSE;
 419         boolean_t setuid = B_FALSE;
 420         boolean_t do_setuid = B_FALSE;
 421         boolean_t exec = B_FALSE;
 422         boolean_t do_exec = B_FALSE;
 423         boolean_t devices = B_FALSE;
 424         boolean_t do_devices = B_FALSE;
 425         boolean_t xattr = B_FALSE;
 426         boolean_t do_xattr = B_FALSE;
 427         boolean_t atime = B_FALSE;
 428         boolean_t do_atime = B_FALSE;
 429         int error = 0;
 430 
 431         ASSERT(vfsp);
 432         zfsvfs = vfsp->vfs_data;
 433         ASSERT(zfsvfs);
 434         os = zfsvfs->z_os;
 435 
 436         /*
 437          * The act of registering our callbacks will destroy any mount
 438          * options we may have.  In order to enable temporary overrides
 439          * of mount options, we stash away the current values and
 440          * restore them after we register the callbacks.
 441          */
 442         if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
 443             !spa_writeable(dmu_objset_spa(os))) {
 444                 readonly = B_TRUE;
 445                 do_readonly = B_TRUE;
 446         } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
 447                 readonly = B_FALSE;
 448                 do_readonly = B_TRUE;
 449         }
 450         if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
 451                 devices = B_FALSE;
 452                 setuid = B_FALSE;
 453                 do_devices = B_TRUE;
 454                 do_setuid = B_TRUE;
 455         } else {
 456                 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
 457                         devices = B_FALSE;
 458                         do_devices = B_TRUE;
 459                 } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) {
 460                         devices = B_TRUE;
 461                         do_devices = B_TRUE;
 462                 }
 463 
 464                 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
 465                         setuid = B_FALSE;
 466                         do_setuid = B_TRUE;
 467                 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
 468                         setuid = B_TRUE;
 469                         do_setuid = B_TRUE;
 470                 }
 471         }
 472         if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
 473                 exec = B_FALSE;
 474                 do_exec = B_TRUE;
 475         } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
 476                 exec = B_TRUE;
 477                 do_exec = B_TRUE;
 478         }
 479         if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
 480                 xattr = B_FALSE;
 481                 do_xattr = B_TRUE;
 482         } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
 483                 xattr = B_TRUE;
 484                 do_xattr = B_TRUE;
 485         }
 486         if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
 487                 atime = B_FALSE;
 488                 do_atime = B_TRUE;
 489         } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
 490                 atime = B_TRUE;
 491                 do_atime = B_TRUE;
 492         }
 493 
 494         /*
 495          * nbmand is a special property.  It can only be changed at
 496          * mount time.
 497          *
 498          * This is weird, but it is documented to only be changeable
 499          * at mount time.
 500          */
 501         if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
 502                 nbmand = B_FALSE;
 503         } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
 504                 nbmand = B_TRUE;
 505         } else {
 506                 char osname[ZFS_MAX_DATASET_NAME_LEN];
 507 
 508                 dmu_objset_name(os, osname);
 509                 if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
 510                     NULL)) {
 511                         return (error);
 512                 }
 513         }
 514 
 515         /*
 516          * Register property callbacks.
 517          *
 518          * It would probably be fine to just check for i/o error from
 519          * the first prop_register(), but I guess I like to go
 520          * overboard...
 521          */
 522         ds = dmu_objset_ds(os);
 523         dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 524         error = dsl_prop_register(ds,
 525             zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
 526         error = error ? error : dsl_prop_register(ds,
 527             zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
 528         error = error ? error : dsl_prop_register(ds,
 529             zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
 530         error = error ? error : dsl_prop_register(ds,
 531             zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
 532         error = error ? error : dsl_prop_register(ds,
 533             zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
 534         error = error ? error : dsl_prop_register(ds,
 535             zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
 536         error = error ? error : dsl_prop_register(ds,
 537             zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
 538         error = error ? error : dsl_prop_register(ds,
 539             zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
 540         error = error ? error : dsl_prop_register(ds,
 541             zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
 542         error = error ? error : dsl_prop_register(ds,
 543             zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
 544             zfsvfs);
 545         error = error ? error : dsl_prop_register(ds,
 546             zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
 547         error = error ? error : dsl_prop_register(ds,
 548             zfs_prop_to_name(ZFS_PROP_RATE_LIMIT), rate_changed_cb, zfsvfs);
 549 
 550         dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 551         if (error)
 552                 goto unregister;
 553 
 554         /*
 555          * Invoke our callbacks to restore temporary mount options.
 556          */
 557         if (do_readonly)
 558                 readonly_changed_cb(zfsvfs, readonly);
 559         if (do_setuid)
 560                 setuid_changed_cb(zfsvfs, setuid);
 561         if (do_exec)
 562                 exec_changed_cb(zfsvfs, exec);
 563         if (do_devices)
 564                 devices_changed_cb(zfsvfs, devices);
 565         if (do_xattr)
 566                 xattr_changed_cb(zfsvfs, xattr);
 567         if (do_atime)
 568                 atime_changed_cb(zfsvfs, atime);
 569 
 570         nbmand_changed_cb(zfsvfs, nbmand);
 571 
 572         return (0);
 573 
 574 unregister:
 575         dsl_prop_unregister_all(ds, zfsvfs);
 576         return (error);
 577 }
 578 
 579 static int
 580 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
 581     uint64_t *userp, uint64_t *groupp)
 582 {
 583         /*
 584          * Is it a valid type of object to track?
 585          */
 586         if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
 587                 return (SET_ERROR(ENOENT));
 588 
 589         /*
 590          * If we have a NULL data pointer
 591          * then assume the id's aren't changing and
 592          * return EEXIST to the dmu to let it know to
 593          * use the same ids
 594          */
 595         if (data == NULL)
 596                 return (SET_ERROR(EEXIST));
 597 
 598         if (bonustype == DMU_OT_ZNODE) {
 599                 znode_phys_t *znp = data;
 600                 *userp = znp->zp_uid;
 601                 *groupp = znp->zp_gid;
 602         } else {
 603                 int hdrsize;
 604                 sa_hdr_phys_t *sap = data;
 605                 sa_hdr_phys_t sa = *sap;
 606                 boolean_t swap = B_FALSE;
 607 
 608                 ASSERT(bonustype == DMU_OT_SA);
 609 
 610                 if (sa.sa_magic == 0) {
 611                         /*
 612                          * This should only happen for newly created
 613                          * files that haven't had the znode data filled
 614                          * in yet.
 615                          */
 616                         *userp = 0;
 617                         *groupp = 0;
 618                         return (0);
 619                 }
 620                 if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
 621                         sa.sa_magic = SA_MAGIC;
 622                         sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
 623                         swap = B_TRUE;
 624                 } else {
 625                         VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
 626                 }
 627 
 628                 hdrsize = sa_hdrsize(&sa);
 629                 VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
 630                 *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
 631                     SA_UID_OFFSET));
 632                 *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
 633                     SA_GID_OFFSET));
 634                 if (swap) {
 635                         *userp = BSWAP_64(*userp);
 636                         *groupp = BSWAP_64(*groupp);
 637                 }
 638         }
 639         return (0);
 640 }
 641 
 642 static void
 643 fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
 644     char *domainbuf, int buflen, uid_t *ridp)
 645 {
 646         uint64_t fuid;
 647         const char *domain;
 648 
 649         fuid = zfs_strtonum(fuidstr, NULL);
 650 
 651         domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
 652         if (domain)
 653                 (void) strlcpy(domainbuf, domain, buflen);
 654         else
 655                 domainbuf[0] = '\0';
 656         *ridp = FUID_RID(fuid);
 657 }
 658 
 659 static uint64_t
 660 zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
 661 {
 662         switch (type) {
 663         case ZFS_PROP_USERUSED:
 664                 return (DMU_USERUSED_OBJECT);
 665         case ZFS_PROP_GROUPUSED:
 666                 return (DMU_GROUPUSED_OBJECT);
 667         case ZFS_PROP_USERQUOTA:
 668                 return (zfsvfs->z_userquota_obj);
 669         case ZFS_PROP_GROUPQUOTA:
 670                 return (zfsvfs->z_groupquota_obj);
 671         }
 672         return (0);
 673 }
 674 
 675 int
 676 zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 677     uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
 678 {
 679         int error;
 680         zap_cursor_t zc;
 681         zap_attribute_t za;
 682         zfs_useracct_t *buf = vbuf;
 683         uint64_t obj;
 684 
 685         if (!dmu_objset_userspace_present(zfsvfs->z_os))
 686                 return (SET_ERROR(ENOTSUP));
 687 
 688         obj = zfs_userquota_prop_to_obj(zfsvfs, type);
 689         if (obj == 0) {
 690                 *bufsizep = 0;
 691                 return (0);
 692         }
 693 
 694         for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
 695             (error = zap_cursor_retrieve(&zc, &za)) == 0;
 696             zap_cursor_advance(&zc)) {
 697                 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
 698                     *bufsizep)
 699                         break;
 700 
 701                 fuidstr_to_sid(zfsvfs, za.za_name,
 702                     buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
 703 
 704                 buf->zu_space = za.za_first_integer;
 705                 buf++;
 706         }
 707         if (error == ENOENT)
 708                 error = 0;
 709 
 710         ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
 711         *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
 712         *cookiep = zap_cursor_serialize(&zc);
 713         zap_cursor_fini(&zc);
 714         return (error);
 715 }
 716 
 717 /*
 718  * buf must be big enough (eg, 32 bytes)
 719  */
 720 static int
 721 id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
 722     char *buf, boolean_t addok)
 723 {
 724         uint64_t fuid;
 725         int domainid = 0;
 726 
 727         if (domain && domain[0]) {
 728                 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
 729                 if (domainid == -1)
 730                         return (SET_ERROR(ENOENT));
 731         }
 732         fuid = FUID_ENCODE(domainid, rid);
 733         (void) sprintf(buf, "%llx", (longlong_t)fuid);
 734         return (0);
 735 }
 736 
 737 int
 738 zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 739     const char *domain, uint64_t rid, uint64_t *valp)
 740 {
 741         char buf[32];
 742         int err;
 743         uint64_t obj;
 744 
 745         *valp = 0;
 746 
 747         if (!dmu_objset_userspace_present(zfsvfs->z_os))
 748                 return (SET_ERROR(ENOTSUP));
 749 
 750         obj = zfs_userquota_prop_to_obj(zfsvfs, type);
 751         if (obj == 0)
 752                 return (0);
 753 
 754         err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
 755         if (err)
 756                 return (err);
 757 
 758         err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
 759         if (err == ENOENT)
 760                 err = 0;
 761         return (err);
 762 }
 763 
 764 int
 765 zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 766     const char *domain, uint64_t rid, uint64_t quota)
 767 {
 768         char buf[32];
 769         int err;
 770         dmu_tx_t *tx;
 771         uint64_t *objp;
 772         boolean_t fuid_dirtied;
 773 
 774         if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
 775                 return (SET_ERROR(EINVAL));
 776 
 777         if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
 778                 return (SET_ERROR(ENOTSUP));
 779 
 780         objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
 781             &zfsvfs->z_groupquota_obj;
 782 
 783         err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
 784         if (err)
 785                 return (err);
 786         fuid_dirtied = zfsvfs->z_fuid_dirty;
 787 
 788         tx = dmu_tx_create(zfsvfs->z_os);
 789         dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
 790         if (*objp == 0) {
 791                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 792                     zfs_userquota_prop_prefixes[type]);
 793         }
 794         if (fuid_dirtied)
 795                 zfs_fuid_txhold(zfsvfs, tx);
 796         err = dmu_tx_assign(tx, TXG_WAIT);
 797         if (err) {
 798                 dmu_tx_abort(tx);
 799                 return (err);
 800         }
 801 
 802         mutex_enter(&zfsvfs->z_lock);
 803         if (*objp == 0) {
 804                 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
 805                     DMU_OT_NONE, 0, tx);
 806                 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
 807                     zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
 808         }
 809         mutex_exit(&zfsvfs->z_lock);
 810 
 811         if (quota == 0) {
 812                 err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
 813                 if (err == ENOENT)
 814                         err = 0;
 815         } else {
 816                 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
 817         }
 818         ASSERT(err == 0);
 819         if (fuid_dirtied)
 820                 zfs_fuid_sync(zfsvfs, tx);
 821         dmu_tx_commit(tx);
 822         return (err);
 823 }
 824 
 825 boolean_t
 826 zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
 827 {
 828         char buf[32];
 829         uint64_t used, quota, usedobj, quotaobj;
 830         int err;
 831 
 832         usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
 833         quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
 834 
 835         if (quotaobj == 0 || zfsvfs->z_replay)
 836                 return (B_FALSE);
 837 
 838         (void) sprintf(buf, "%llx", (longlong_t)fuid);
 839         err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
 840         if (err != 0)
 841                 return (B_FALSE);
 842 
 843         err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
 844         if (err != 0)
 845                 return (B_FALSE);
 846         return (used >= quota);
 847 }
 848 
 849 boolean_t
 850 zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
 851 {
 852         uint64_t fuid;
 853         uint64_t quotaobj;
 854 
 855         quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
 856 
 857         fuid = isgroup ? zp->z_gid : zp->z_uid;
 858 
 859         if (quotaobj == 0 || zfsvfs->z_replay)
 860                 return (B_FALSE);
 861 
 862         return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
 863 }
 864 
 865 /*
 866  * Associate this zfsvfs with the given objset, which must be owned.
 867  * This will cache a bunch of on-disk state from the objset in the
 868  * zfsvfs.
 869  */
 870 static int
 871 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
 872 {
 873         int error;
 874         uint64_t val;
 875 
 876         zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
 877         zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
 878         zfsvfs->z_os = os;
 879 
 880         error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
 881         if (error != 0)
 882                 return (error);
 883         if (zfsvfs->z_version >
 884             zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
 885                 (void) printf("Can't mount a version %lld file system "
 886                     "on a version %lld pool\n. Pool must be upgraded to mount "
 887                     "this file system.", (u_longlong_t)zfsvfs->z_version,
 888                     (u_longlong_t)spa_version(dmu_objset_spa(os)));
 889                 return (SET_ERROR(ENOTSUP));
 890         }
 891         error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
 892         if (error != 0)
 893                 return (error);
 894         zfsvfs->z_norm = (int)val;
 895 
 896         error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
 897         if (error != 0)
 898                 return (error);
 899         zfsvfs->z_utf8 = (val != 0);
 900 
 901         error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
 902         if (error != 0)
 903                 return (error);
 904         zfsvfs->z_case = (uint_t)val;
 905 
 906         /*
 907          * Fold case on file systems that are always or sometimes case
 908          * insensitive.
 909          */
 910         if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
 911             zfsvfs->z_case == ZFS_CASE_MIXED)
 912                 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
 913 
 914         zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
 915         zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
 916 
 917         uint64_t sa_obj = 0;
 918         if (zfsvfs->z_use_sa) {
 919                 /* should either have both of these objects or none */
 920                 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
 921                     &sa_obj);
 922                 if (error != 0)
 923                         return (error);
 924         }
 925 
 926         error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
 927             &zfsvfs->z_attr_table);
 928         if (error != 0)
 929                 return (error);
 930 
 931         if (zfsvfs->z_version >= ZPL_VERSION_SA)
 932                 sa_register_update_callback(os, zfs_sa_upgrade);
 933 
 934         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
 935             &zfsvfs->z_root);
 936         if (error != 0)
 937                 return (error);
 938         ASSERT(zfsvfs->z_root != 0);
 939 
 940         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
 941             &zfsvfs->z_unlinkedobj);
 942         if (error != 0)
 943                 return (error);
 944 
 945         error = zap_lookup(os, MASTER_NODE_OBJ,
 946             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
 947             8, 1, &zfsvfs->z_userquota_obj);
 948         if (error == ENOENT)
 949                 zfsvfs->z_userquota_obj = 0;
 950         else if (error != 0)
 951                 return (error);
 952 
 953         error = zap_lookup(os, MASTER_NODE_OBJ,
 954             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
 955             8, 1, &zfsvfs->z_groupquota_obj);
 956         if (error == ENOENT)
 957                 zfsvfs->z_groupquota_obj = 0;
 958         else if (error != 0)
 959                 return (error);
 960 
 961         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
 962             &zfsvfs->z_fuid_obj);
 963         if (error == ENOENT)
 964                 zfsvfs->z_fuid_obj = 0;
 965         else if (error != 0)
 966                 return (error);
 967 
 968         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
 969             &zfsvfs->z_shares_dir);
 970         if (error == ENOENT)
 971                 zfsvfs->z_shares_dir = 0;
 972         else if (error != 0)
 973                 return (error);
 974 
 975         return (0);
 976 }
 977 
 978 int
 979 zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
 980 {
 981         objset_t *os;
 982         zfsvfs_t *zfsvfs;
 983         int error;
 984 
 985         zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 986 
 987         /*
 988          * We claim to always be readonly so we can open snapshots;
 989          * other ZPL code will prevent us from writing to snapshots.
 990          */
 991 
 992         error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
 993         if (error != 0) {
 994                 kmem_free(zfsvfs, sizeof (zfsvfs_t));
 995                 return (error);
 996         }
 997 
 998         error = zfsvfs_create_impl(zfvp, zfsvfs, os);
 999         if (error != 0) {
1000                 dmu_objset_disown(os, zfsvfs);
1001         }
1002         return (error);
1003 }
1004 
1005 
1006 int
1007 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
1008 {
1009         int error;
1010         int size = spa_get_obj_mtx_sz(dmu_objset_spa(os));
1011 
1012         zfsvfs->z_vfs = NULL;
1013         zfsvfs->z_parent = zfsvfs;
1014 
1015         mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1016         mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
1017         list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
1018             offsetof(znode_t, z_link_node));
1019         rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
1020         rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
1021         rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
1022         zfsvfs->z_hold_mtx_sz = size;
1023         zfsvfs->z_hold_mtx = kmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
1024         for (int i = 0; i != size; i++)
1025                 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
1026         mutex_init(&zfsvfs->z_drain_lock, NULL, MUTEX_DEFAULT, NULL);
1027         cv_init(&zfsvfs->z_drain_cv, NULL, CV_DEFAULT, NULL);
1028 
1029         error = zfsvfs_init(zfsvfs, os);
1030         if (error != 0) {
1031                 *zfvp = NULL;
1032                 kmem_free(zfsvfs->z_hold_mtx, sizeof (kmutex_t) * size);
1033                 kmem_free(zfsvfs, sizeof (zfsvfs_t));
1034                 return (error);
1035         }
1036 
1037         *zfvp = zfsvfs;
1038         return (0);
1039 }
1040 
1041 static int
1042 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
1043 {
1044         int error;
1045 
1046         error = zfs_register_callbacks(zfsvfs->z_vfs);
1047         if (error)
1048                 return (error);
1049 
1050         zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
1051 
1052         /*
1053          * If we are not mounting (ie: online recv), then we don't
1054          * have to worry about replaying the log as we blocked all
1055          * operations out since we closed the ZIL.
1056          */
1057         if (mounting) {
1058                 boolean_t readonly;
1059 
1060                 /*
1061                  * During replay we remove the read only flag to
1062                  * allow replays to succeed.
1063                  */
1064                 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
1065                 if (readonly)
1066                         zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
1067                 else {
1068                         zfs_unlinked_drain(zfsvfs);
1069                 }
1070 
1071                 /*
1072                  * Parse and replay the intent log.
1073                  *
1074                  * Because of ziltest, this must be done after
1075                  * zfs_unlinked_drain().  (Further note: ziltest
1076                  * doesn't use readonly mounts, where
1077                  * zfs_unlinked_drain() isn't called.)  This is because
1078                  * ziltest causes spa_sync() to think it's committed,
1079                  * but actually it is not, so the intent log contains
1080                  * many txg's worth of changes.
1081                  *
1082                  * In particular, if object N is in the unlinked set in
1083                  * the last txg to actually sync, then it could be
1084                  * actually freed in a later txg and then reallocated
1085                  * in a yet later txg.  This would write a "create
1086                  * object N" record to the intent log.  Normally, this
1087                  * would be fine because the spa_sync() would have
1088                  * written out the fact that object N is free, before
1089                  * we could write the "create object N" intent log
1090                  * record.
1091                  *
1092                  * But when we are in ziltest mode, we advance the "open
1093                  * txg" without actually spa_sync()-ing the changes to
1094                  * disk.  So we would see that object N is still
1095                  * allocated and in the unlinked set, and there is an
1096                  * intent log record saying to allocate it.
1097                  */
1098                 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
1099                         if (zil_replay_disable) {
1100                                 zil_destroy(zfsvfs->z_log, B_FALSE);
1101                         } else {
1102                                 zfsvfs->z_replay = B_TRUE;
1103                                 zil_replay(zfsvfs->z_os, zfsvfs,
1104                                     zfs_replay_vector);
1105                                 zfsvfs->z_replay = B_FALSE;
1106                         }
1107                 }
1108 
1109                 /* restore readonly bit */
1110                 if (readonly)
1111                         zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
1112         }
1113 
1114         /*
1115          * Set the objset user_ptr to track its zfsvfs.
1116          */
1117         mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1118         dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1119         mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1120 
1121         return (0);
1122 }
1123 
1124 void
1125 zfsvfs_free(zfsvfs_t *zfsvfs)
1126 {
1127         int i;
1128         extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
1129 
1130         /*
1131          * This is a barrier to prevent the filesystem from going away in
1132          * zfs_znode_move() until we can safely ensure that the filesystem is
1133          * not unmounted. We consider the filesystem valid before the barrier
1134          * and invalid after the barrier.
1135          */
1136         rw_enter(&zfsvfs_lock, RW_READER);
1137         rw_exit(&zfsvfs_lock);
1138 
1139         VERIFY0(zfsvfs->z_znodes_freeing_cnt);
1140 
1141         zfs_fuid_destroy(zfsvfs);
1142 
1143         cv_destroy(&zfsvfs->z_drain_cv);
1144         mutex_destroy(&zfsvfs->z_drain_lock);
1145         mutex_destroy(&zfsvfs->z_znodes_lock);
1146         mutex_destroy(&zfsvfs->z_lock);
1147         list_destroy(&zfsvfs->z_all_znodes);
1148         rrm_destroy(&zfsvfs->z_teardown_lock);
1149         rw_destroy(&zfsvfs->z_teardown_inactive_lock);
1150         rw_destroy(&zfsvfs->z_fuid_lock);
1151         for (i = 0; i != zfsvfs->z_hold_mtx_sz; i++)
1152                 mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1153 
1154         kmem_free(zfsvfs->z_hold_mtx,
1155             sizeof (kmutex_t) * zfsvfs->z_hold_mtx_sz);
1156         kmem_free(zfsvfs, sizeof (zfsvfs_t));
1157 }
1158 
1159 static void
1160 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
1161 {
1162         zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1163         if (zfsvfs->z_vfs) {
1164                 if (zfsvfs->z_use_fuids) {
1165                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1166                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1167                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1168                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1169                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1170                         vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1171                 } else {
1172                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1173                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1174                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1175                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1176                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1177                         vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1178                 }
1179         }
1180         zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1181 }
1182 
1183 static int
1184 zfs_domount(vfs_t *vfsp, char *osname)
1185 {
1186         dev_t mount_dev;
1187         uint64_t recordsize, fsid_guid;
1188         int error = 0;
1189         zfsvfs_t *zfsvfs;
1190         char    worminfo[13] = {0};
1191 
1192         ASSERT(vfsp);
1193         ASSERT(osname);
1194 
1195         error = zfsvfs_create(osname, &zfsvfs);
1196         if (error)
1197                 return (error);
1198         zfsvfs->z_vfs = vfsp;
1199 
1200         /* Initialize the generic filesystem structure. */
1201         vfsp->vfs_bcount = 0;
1202         vfsp->vfs_data = NULL;
1203 
1204         if (zfs_create_unique_device(&mount_dev) == -1) {
1205                 error = SET_ERROR(ENODEV);
1206                 goto out;
1207         }
1208         ASSERT(vfs_devismounted(mount_dev) == 0);
1209 
1210         if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
1211             NULL))
1212                 goto out;
1213 
1214         if (dsl_prop_get(osname, "nms:worm", 1, 12, &worminfo, NULL) == 0 &&
1215             worminfo[0] && strcmp(worminfo, "0") != 0 &&
1216             strcmp(worminfo, "off") != 0 && strcmp(worminfo, "-") != 0) {
1217                 zfsvfs->z_isworm = B_TRUE;
1218         } else {
1219                 zfsvfs->z_isworm = B_FALSE;
1220         }
1221 
1222         vfsp->vfs_dev = mount_dev;
1223         vfsp->vfs_fstype = zfsfstype;
1224         vfsp->vfs_bsize = recordsize;
1225         vfsp->vfs_flag |= VFS_NOTRUNC;
1226         vfsp->vfs_data = zfsvfs;
1227 
1228         /*
1229          * The fsid is 64 bits, composed of an 8-bit fs type, which
1230          * separates our fsid from any other filesystem types, and a
1231          * 56-bit objset unique ID.  The objset unique ID is unique to
1232          * all objsets open on this system, provided by unique_create().
1233          * The 8-bit fs type must be put in the low bits of fsid[1]
1234          * because that's where other Solaris filesystems put it.
1235          */
1236         fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
1237         ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
1238         vfsp->vfs_fsid.val[0] = fsid_guid;
1239         vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
1240             zfsfstype & 0xFF;
1241 
1242         /*
1243          * Set features for file system.
1244          */
1245         zfs_set_fuid_feature(zfsvfs);
1246         if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
1247                 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1248                 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1249                 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
1250         } else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
1251                 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1252                 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1253         }
1254         vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
1255 
1256         if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1257                 uint64_t pval;
1258 
1259                 atime_changed_cb(zfsvfs, B_FALSE);
1260                 readonly_changed_cb(zfsvfs, B_TRUE);
1261                 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
1262                         goto out;
1263                 xattr_changed_cb(zfsvfs, pval);
1264                 zfsvfs->z_issnap = B_TRUE;
1265                 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
1266 
1267                 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1268                 dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1269                 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1270         } else {
1271                 error = zfsvfs_setup(zfsvfs, B_TRUE);
1272         }
1273 
1274         if (!zfsvfs->z_issnap)
1275                 zfsctl_create(zfsvfs);
1276 out:
1277         if (error) {
1278                 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1279                 zfsvfs_free(zfsvfs);
1280         } else {
1281                 atomic_inc_32(&zfs_active_fs_count);
1282         }
1283 
1284         return (error);
1285 }
1286 
1287 void
1288 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1289 {
1290         objset_t *os = zfsvfs->z_os;
1291 
1292         if (!dmu_objset_is_snapshot(os))
1293                 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
1294 }
1295 
1296 /*
1297  * Convert a decimal digit string to a uint64_t integer.
1298  */
1299 static int
1300 str_to_uint64(char *str, uint64_t *objnum)
1301 {
1302         uint64_t num = 0;
1303 
1304         while (*str) {
1305                 if (*str < '0' || *str > '9')
1306                         return (SET_ERROR(EINVAL));
1307 
1308                 num = num*10 + *str++ - '0';
1309         }
1310 
1311         *objnum = num;
1312         return (0);
1313 }
1314 
1315 /*
1316  * The boot path passed from the boot loader is in the form of
1317  * "rootpool-name/root-filesystem-object-number'. Convert this
1318  * string to a dataset name: "rootpool-name/root-filesystem-name".
1319  */
1320 static int
1321 zfs_parse_bootfs(char *bpath, char *outpath)
1322 {
1323         char *slashp;
1324         uint64_t objnum;
1325         int error;
1326 
1327         if (*bpath == 0 || *bpath == '/')
1328                 return (SET_ERROR(EINVAL));
1329 
1330         (void) strcpy(outpath, bpath);
1331 
1332         slashp = strchr(bpath, '/');
1333 
1334         /* if no '/', just return the pool name */
1335         if (slashp == NULL) {
1336                 return (0);
1337         }
1338 
1339         /* if not a number, just return the root dataset name */
1340         if (str_to_uint64(slashp+1, &objnum)) {
1341                 return (0);
1342         }
1343 
1344         *slashp = '\0';
1345         error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
1346         *slashp = '/';
1347 
1348         return (error);
1349 }
1350 
1351 /*
1352  * Check that the hex label string is appropriate for the dataset being
1353  * mounted into the global_zone proper.
1354  *
1355  * Return an error if the hex label string is not default or
1356  * admin_low/admin_high.  For admin_low labels, the corresponding
1357  * dataset must be readonly.
1358  */
1359 int
1360 zfs_check_global_label(const char *dsname, const char *hexsl)
1361 {
1362         if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1363                 return (0);
1364         if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
1365                 return (0);
1366         if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
1367                 /* must be readonly */
1368                 uint64_t rdonly;
1369 
1370                 if (dsl_prop_get_integer(dsname,
1371                     zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
1372                         return (SET_ERROR(EACCES));
1373                 return (rdonly ? 0 : EACCES);
1374         }
1375         return (SET_ERROR(EACCES));
1376 }
1377 
1378 /*
1379  * Determine whether the mount is allowed according to MAC check.
1380  * by comparing (where appropriate) label of the dataset against
1381  * the label of the zone being mounted into.  If the dataset has
1382  * no label, create one.
1383  *
1384  * Returns 0 if access allowed, error otherwise (e.g. EACCES)
1385  */
1386 static int
1387 zfs_mount_label_policy(vfs_t *vfsp, char *osname)
1388 {
1389         int             error, retv;
1390         zone_t          *mntzone = NULL;
1391         ts_label_t      *mnt_tsl;
1392         bslabel_t       *mnt_sl;
1393         bslabel_t       ds_sl;
1394         char            ds_hexsl[MAXNAMELEN];
1395 
1396         retv = EACCES;                          /* assume the worst */
1397 
1398         /*
1399          * Start by getting the dataset label if it exists.
1400          */
1401         error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1402             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
1403         if (error)
1404                 return (SET_ERROR(EACCES));
1405 
1406         /*
1407          * If labeling is NOT enabled, then disallow the mount of datasets
1408          * which have a non-default label already.  No other label checks
1409          * are needed.
1410          */
1411         if (!is_system_labeled()) {
1412                 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1413                         return (0);
1414                 return (SET_ERROR(EACCES));
1415         }
1416 
1417         /*
1418          * Get the label of the mountpoint.  If mounting into the global
1419          * zone (i.e. mountpoint is not within an active zone and the
1420          * zoned property is off), the label must be default or
1421          * admin_low/admin_high only; no other checks are needed.
1422          */
1423         mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
1424         if (mntzone->zone_id == GLOBAL_ZONEID) {
1425                 uint64_t zoned;
1426 
1427                 zone_rele(mntzone);
1428 
1429                 if (dsl_prop_get_integer(osname,
1430                     zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
1431                         return (SET_ERROR(EACCES));
1432                 if (!zoned)
1433                         return (zfs_check_global_label(osname, ds_hexsl));
1434                 else
1435                         /*
1436                          * This is the case of a zone dataset being mounted
1437                          * initially, before the zone has been fully created;
1438                          * allow this mount into global zone.
1439                          */
1440                         return (0);
1441         }
1442 
1443         mnt_tsl = mntzone->zone_slabel;
1444         ASSERT(mnt_tsl != NULL);
1445         label_hold(mnt_tsl);
1446         mnt_sl = label2bslabel(mnt_tsl);
1447 
1448         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
1449                 /*
1450                  * The dataset doesn't have a real label, so fabricate one.
1451                  */
1452                 char *str = NULL;
1453 
1454                 if (l_to_str_internal(mnt_sl, &str) == 0 &&
1455                     dsl_prop_set_string(osname,
1456                     zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1457                     ZPROP_SRC_LOCAL, str) == 0)
1458                         retv = 0;
1459                 if (str != NULL)
1460                         kmem_free(str, strlen(str) + 1);
1461         } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
1462                 /*
1463                  * Now compare labels to complete the MAC check.  If the
1464                  * labels are equal then allow access.  If the mountpoint
1465                  * label dominates the dataset label, allow readonly access.
1466                  * Otherwise, access is denied.
1467                  */
1468                 if (blequal(mnt_sl, &ds_sl))
1469                         retv = 0;
1470                 else if (bldominates(mnt_sl, &ds_sl)) {
1471                         vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
1472                         retv = 0;
1473                 }
1474         }
1475 
1476         label_rele(mnt_tsl);
1477         zone_rele(mntzone);
1478         return (retv);
1479 }
1480 
1481 static int
1482 zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
1483 {
1484         int error = 0;
1485         static int zfsrootdone = 0;
1486         zfsvfs_t *zfsvfs = NULL;
1487         znode_t *zp = NULL;
1488         vnode_t *vp = NULL;
1489         char *zfs_bootfs;
1490         char *zfs_devid;
1491 
1492         ASSERT(vfsp);
1493 
1494         /*
1495          * The filesystem that we mount as root is defined in the
1496          * boot property "zfs-bootfs" with a format of
1497          * "poolname/root-dataset-objnum".
1498          */
1499         if (why == ROOT_INIT) {
1500                 if (zfsrootdone++)
1501                         return (SET_ERROR(EBUSY));
1502                 /*
1503                  * the process of doing a spa_load will require the
1504                  * clock to be set before we could (for example) do
1505                  * something better by looking at the timestamp on
1506                  * an uberblock, so just set it to -1.
1507                  */
1508                 clkset(-1);
1509 
1510                 if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
1511                         cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
1512                             "bootfs name");
1513                         return (SET_ERROR(EINVAL));
1514                 }
1515                 zfs_devid = spa_get_bootprop("diskdevid");
1516                 error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
1517                 if (zfs_devid)
1518                         spa_free_bootprop(zfs_devid);
1519                 if (error) {
1520                         spa_free_bootprop(zfs_bootfs);
1521                         cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
1522                             error);
1523                         return (error);
1524                 }
1525                 if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
1526                         spa_free_bootprop(zfs_bootfs);
1527                         cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
1528                             error);
1529                         return (error);
1530                 }
1531 
1532                 spa_free_bootprop(zfs_bootfs);
1533 
1534                 if (error = vfs_lock(vfsp))
1535                         return (error);
1536 
1537                 if (error = zfs_domount(vfsp, rootfs.bo_name)) {
1538                         cmn_err(CE_NOTE, "zfs_domount: error %d", error);
1539                         goto out;
1540                 }
1541 
1542                 zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
1543                 ASSERT(zfsvfs);
1544                 if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
1545                         cmn_err(CE_NOTE, "zfs_zget: error %d", error);
1546                         goto out;
1547                 }
1548 
1549                 vp = ZTOV(zp);
1550                 mutex_enter(&vp->v_lock);
1551                 vp->v_flag |= VROOT;
1552                 mutex_exit(&vp->v_lock);
1553                 rootvp = vp;
1554 
1555                 /*
1556                  * Leave rootvp held.  The root file system is never unmounted.
1557                  */
1558 
1559                 vfs_add((struct vnode *)0, vfsp,
1560                     (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
1561 out:
1562                 vfs_unlock(vfsp);
1563                 return (error);
1564         } else if (why == ROOT_REMOUNT) {
1565                 readonly_changed_cb(vfsp->vfs_data, B_FALSE);
1566                 vfsp->vfs_flag |= VFS_REMOUNT;
1567 
1568                 /* refresh mount options */
1569                 zfs_unregister_callbacks(vfsp->vfs_data);
1570                 return (zfs_register_callbacks(vfsp));
1571 
1572         } else if (why == ROOT_UNMOUNT) {
1573                 zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
1574                 (void) zfs_sync(vfsp, 0, 0);
1575                 return (0);
1576         }
1577 
1578         /*
1579          * if "why" is equal to anything else other than ROOT_INIT,
1580          * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
1581          */
1582         return (SET_ERROR(ENOTSUP));
1583 }
1584 
1585 /*ARGSUSED*/
1586 static int
1587 zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
1588 {
1589         char            *osname;
1590         pathname_t      spn;
1591         int             error = 0;
1592         uio_seg_t       fromspace = (uap->flags & MS_SYSSPACE) ?
1593             UIO_SYSSPACE : UIO_USERSPACE;
1594         int             canwrite;
1595 
1596         if (mvp->v_type != VDIR)
1597                 return (SET_ERROR(ENOTDIR));
1598 
1599         mutex_enter(&mvp->v_lock);
1600         if ((uap->flags & MS_REMOUNT) == 0 &&
1601             (uap->flags & MS_OVERLAY) == 0 &&
1602             (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
1603                 mutex_exit(&mvp->v_lock);
1604                 return (SET_ERROR(EBUSY));
1605         }
1606         mutex_exit(&mvp->v_lock);
1607 
1608         /*
1609          * ZFS does not support passing unparsed data in via MS_DATA.
1610          * Users should use the MS_OPTIONSTR interface; this means
1611          * that all option parsing is already done and the options struct
1612          * can be interrogated.
1613          */
1614         if ((uap->flags & MS_DATA) && uap->datalen > 0)
1615                 return (SET_ERROR(EINVAL));
1616 
1617         /*
1618          * Get the objset name (the "special" mount argument).
1619          */
1620         if (error = pn_get(uap->spec, fromspace, &spn))
1621                 return (error);
1622 
1623         osname = spn.pn_path;
1624 
1625         /*
1626          * Check for mount privilege?
1627          *
1628          * If we don't have privilege then see if
1629          * we have local permission to allow it
1630          */
1631         error = secpolicy_fs_mount(cr, mvp, vfsp);
1632         if (error) {
1633                 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) == 0) {
1634                         vattr_t         vattr;
1635 
1636                         /*
1637                          * Make sure user is the owner of the mount point
1638                          * or has sufficient privileges.
1639                          */
1640 
1641                         vattr.va_mask = AT_UID;
1642 
1643                         if (VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
1644                                 goto out;
1645                         }
1646 
1647                         if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 &&
1648                             VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) {
1649                                 goto out;
1650                         }
1651                         secpolicy_fs_mount_clearopts(cr, vfsp);
1652                 } else {
1653                         goto out;
1654                 }
1655         }
1656 
1657         /*
1658          * Refuse to mount a filesystem if we are in a local zone and the
1659          * dataset is not visible.
1660          */
1661         if (!INGLOBALZONE(curproc) &&
1662             (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
1663                 error = SET_ERROR(EPERM);
1664                 goto out;
1665         }
1666 
1667         error = zfs_mount_label_policy(vfsp, osname);
1668         if (error)
1669                 goto out;
1670 
1671         /*
1672          * When doing a remount, we simply refresh our temporary properties
1673          * according to those options set in the current VFS options.
1674          */
1675         if (uap->flags & MS_REMOUNT) {
1676                 /* refresh mount options */
1677                 zfs_unregister_callbacks(vfsp->vfs_data);
1678                 error = zfs_register_callbacks(vfsp);
1679                 goto out;
1680         }
1681 
1682         error = zfs_domount(vfsp, osname);
1683 
1684         /*
1685          * Add an extra VFS_HOLD on our parent vfs so that it can't
1686          * disappear due to a forced unmount.
1687          */
1688         if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
1689                 VFS_HOLD(mvp->v_vfsp);
1690 
1691 out:
1692         pn_free(&spn);
1693         return (error);
1694 }
1695 
1696 static int
1697 zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp)
1698 {
1699         zfsvfs_t *zfsvfs = vfsp->vfs_data;
1700         dev32_t d32;
1701         uint64_t refdbytes, availbytes, usedobjs, availobjs;
1702 
1703         ZFS_ENTER(zfsvfs);
1704 
1705         dmu_objset_space(zfsvfs->z_os,
1706             &refdbytes, &availbytes, &usedobjs, &availobjs);
1707 
1708         /*
1709          * The underlying storage pool actually uses multiple block sizes.
1710          * We report the fragsize as the smallest block size we support,
1711          * and we report our blocksize as the filesystem's maximum blocksize.
1712          */
1713         statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT;
1714         statp->f_bsize = zfsvfs->z_max_blksz;
1715 
1716         /*
1717          * The following report "total" blocks of various kinds in the
1718          * file system, but reported in terms of f_frsize - the
1719          * "fragment" size.
1720          */
1721 
1722         statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
1723         statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT;
1724         statp->f_bavail = statp->f_bfree; /* no root reservation */
1725 
1726         /*
1727          * statvfs() should really be called statufs(), because it assumes
1728          * static metadata.  ZFS doesn't preallocate files, so the best
1729          * we can do is report the max that could possibly fit in f_files,
1730          * and that minus the number actually used in f_ffree.
1731          * For f_ffree, report the smaller of the number of object available
1732          * and the number of blocks (each object will take at least a block).
1733          */
1734         statp->f_ffree = MIN(availobjs, statp->f_bfree);
1735         statp->f_favail = statp->f_ffree; /* no "root reservation" */
1736         statp->f_files = statp->f_ffree + usedobjs;
1737 
1738         (void) cmpldev(&d32, vfsp->vfs_dev);
1739         statp->f_fsid = d32;
1740 
1741         /*
1742          * We're a zfs filesystem.
1743          */
1744         (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
1745 
1746         statp->f_flag = vf_to_stf(vfsp->vfs_flag);
1747 
1748         statp->f_namemax = MAXNAMELEN - 1;
1749 
1750         /*
1751          * We have all of 32 characters to stuff a string here.
1752          * Is there anything useful we could/should provide?
1753          */
1754         bzero(statp->f_fstr, sizeof (statp->f_fstr));
1755 
1756         ZFS_EXIT(zfsvfs);
1757         return (0);
1758 }
1759 
1760 static int
1761 zfs_root(vfs_t *vfsp, vnode_t **vpp)
1762 {
1763         zfsvfs_t *zfsvfs = vfsp->vfs_data;
1764         znode_t *rootzp;
1765         int error;
1766 
1767         ZFS_ENTER(zfsvfs);
1768 
1769         error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1770         if (error == 0)
1771                 *vpp = ZTOV(rootzp);
1772 
1773         ZFS_EXIT(zfsvfs);
1774         return (error);
1775 }
1776 
1777 /*
1778  * Teardown the zfsvfs::z_os.
1779  *
1780  * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
1781  * and 'z_teardown_inactive_lock' held.
1782  */
1783 static int
1784 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1785 {
1786         znode_t *zp;
1787 
1788         zfs_unlinked_drain_stop_wait(zfsvfs);
1789         rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
1790 
1791         if (!unmounting) {
1792                 /*
1793                  * We purge the parent filesystem's vfsp as the parent
1794                  * filesystem and all of its snapshots have their vnode's
1795                  * v_vfsp set to the parent's filesystem's vfsp.  Note,
1796                  * 'z_parent' is self referential for non-snapshots.
1797                  */
1798                 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1799         }
1800 
1801         /*
1802          * Close the zil. NB: Can't close the zil while zfs_inactive
1803          * threads are blocked as zil_close can call zfs_inactive.
1804          */
1805         if (zfsvfs->z_log) {
1806                 zil_close(zfsvfs->z_log);
1807                 zfsvfs->z_log = NULL;
1808         }
1809 
1810         rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
1811 
1812         /*
1813          * If we are not unmounting (ie: online recv) and someone already
1814          * unmounted this file system while we were doing the switcheroo,
1815          * or a reopen of z_os failed then just bail out now.
1816          */
1817         if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1818                 rw_exit(&zfsvfs->z_teardown_inactive_lock);
1819                 rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
1820                 return (SET_ERROR(EIO));
1821         }
1822 
1823         /*
1824          * At this point there are no vops active, and any new vops will
1825          * fail with EIO since we have z_teardown_lock for writer (only
1826          * relavent for forced unmount).
1827          *
1828          * Release all holds on dbufs.
1829          */
1830         mutex_enter(&zfsvfs->z_znodes_lock);
1831         for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1832             zp = list_next(&zfsvfs->z_all_znodes, zp))
1833                 if (zp->z_sa_hdl) {
1834                         ASSERT(ZTOV(zp)->v_count > 0);
1835                         zfs_znode_dmu_fini(zp);
1836                 }
1837         mutex_exit(&zfsvfs->z_znodes_lock);
1838 
1839         /*
1840          * If we are unmounting, set the unmounted flag and let new vops
1841          * unblock.  zfs_inactive will have the unmounted behavior, and all
1842          * other vops will fail with EIO.
1843          */
1844         if (unmounting) {
1845                 zfsvfs->z_unmounted = B_TRUE;
1846                 rw_exit(&zfsvfs->z_teardown_inactive_lock);
1847                 rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
1848         }
1849 
1850         /*
1851          * z_os will be NULL if there was an error in attempting to reopen
1852          * zfsvfs, so just return as the properties had already been
1853          * unregistered and cached data had been evicted before.
1854          */
1855         if (zfsvfs->z_os == NULL)
1856                 return (0);
1857 
1858         /*
1859          * Unregister properties.
1860          */
1861         zfs_unregister_callbacks(zfsvfs);
1862 
1863         /*
1864          * Evict cached data
1865          */
1866         if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
1867             !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
1868                 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
1869         (void) dmu_objset_evict_dbufs(zfsvfs->z_os);
1870 
1871         return (0);
1872 }
1873 
1874 /*ARGSUSED*/
1875 static int
1876 zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
1877 {
1878         zfsvfs_t *zfsvfs = vfsp->vfs_data;
1879         objset_t *os;
1880         int ret;
1881 
1882         ret = secpolicy_fs_unmount(cr, vfsp);
1883         if (ret) {
1884                 if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
1885                     ZFS_DELEG_PERM_MOUNT, cr))
1886                         return (ret);
1887         }
1888 
1889         /*
1890          * We purge the parent filesystem's vfsp as the parent filesystem
1891          * and all of its snapshots have their vnode's v_vfsp set to the
1892          * parent's filesystem's vfsp.  Note, 'z_parent' is self
1893          * referential for non-snapshots.
1894          */
1895         (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1896 
1897         /*
1898          * Unmount any snapshots mounted under .zfs before unmounting the
1899          * dataset itself.
1900          */
1901         if (zfsvfs->z_ctldir != NULL &&
1902             (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) {
1903                 return (ret);
1904         }
1905 
1906         if (!(fflag & MS_FORCE)) {
1907                 uint_t active_vnodes;
1908 
1909                 /*
1910                  * Check the number of active vnodes in the file system.
1911                  * Our count is maintained in the vfs structure, but the
1912                  * number is off by 1 to indicate a hold on the vfs
1913                  * structure itself.
1914                  *
1915                  * The '.zfs' directory maintains a reference of its
1916                  * own, and any active references underneath are
1917                  * reflected in the vnode count.
1918                  *
1919                  * Active vnodes: vnodes that were held by an user
1920                  */
1921 
1922                 active_vnodes =
1923                     vfsp->vfs_count - zfsvfs->z_znodes_freeing_cnt;
1924 
1925                 if (zfsvfs->z_ctldir == NULL) {
1926                         if (active_vnodes > 1)
1927                                 return (SET_ERROR(EBUSY));
1928                 } else {
1929                         if (active_vnodes > 2 ||
1930                             zfsvfs->z_ctldir->v_count > 1)
1931                                 return (SET_ERROR(EBUSY));
1932                 }
1933         }
1934 
1935         vfsp->vfs_flag |= VFS_UNMOUNTED;
1936 
1937         VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
1938         os = zfsvfs->z_os;
1939 
1940         /*
1941          * z_os will be NULL if there was an error in
1942          * attempting to reopen zfsvfs.
1943          */
1944         if (os != NULL) {
1945                 /*
1946                  * Unset the objset user_ptr.
1947                  */
1948                 mutex_enter(&os->os_user_ptr_lock);
1949                 dmu_objset_set_user(os, NULL);
1950                 mutex_exit(&os->os_user_ptr_lock);
1951 
1952                 /*
1953                  * Finally release the objset
1954                  */
1955                 dmu_objset_disown(os, zfsvfs);
1956         }
1957 
1958         /*
1959          * We can now safely destroy the '.zfs' directory node.
1960          */
1961         if (zfsvfs->z_ctldir != NULL)
1962                 zfsctl_destroy(zfsvfs);
1963 
1964         return (0);
1965 }
1966 
1967 static int
1968 zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1969 {
1970         zfsvfs_t        *zfsvfs = vfsp->vfs_data;
1971         znode_t         *zp;
1972         uint64_t        object = 0;
1973         uint64_t        fid_gen = 0;
1974         uint64_t        gen_mask;
1975         uint64_t        zp_gen;
1976         int             i, err;
1977 
1978         *vpp = NULL;
1979 
1980         ZFS_ENTER(zfsvfs);
1981 
1982         if (fidp->fid_len == LONG_FID_LEN) {
1983                 zfid_long_t     *zlfid = (zfid_long_t *)fidp;
1984                 uint64_t        objsetid = 0;
1985                 uint64_t        setgen = 0;
1986 
1987                 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
1988                         objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
1989 
1990                 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
1991                         setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
1992 
1993                 ZFS_EXIT(zfsvfs);
1994 
1995                 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
1996                 if (err)
1997                         return (SET_ERROR(EINVAL));
1998                 ZFS_ENTER(zfsvfs);
1999         }
2000 
2001         if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
2002                 zfid_short_t    *zfid = (zfid_short_t *)fidp;
2003 
2004                 for (i = 0; i < sizeof (zfid->zf_object); i++)
2005                         object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
2006 
2007                 for (i = 0; i < sizeof (zfid->zf_gen); i++)
2008                         fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
2009         } else {
2010                 ZFS_EXIT(zfsvfs);
2011                 return (SET_ERROR(EINVAL));
2012         }
2013 
2014         /* A zero fid_gen means we are in the .zfs control directories */
2015         if (fid_gen == 0 &&
2016             (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
2017                 *vpp = zfsvfs->z_ctldir;
2018                 ASSERT(*vpp != NULL);
2019                 if (object == ZFSCTL_INO_SNAPDIR) {
2020                         VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
2021                             0, NULL, NULL, NULL, NULL, NULL) == 0);
2022                 } else {
2023                         VN_HOLD(*vpp);
2024                 }
2025                 ZFS_EXIT(zfsvfs);
2026                 return (0);
2027         }
2028 
2029         gen_mask = -1ULL >> (64 - 8 * i);
2030 
2031         dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
2032         if (err = zfs_zget(zfsvfs, object, &zp)) {
2033                 ZFS_EXIT(zfsvfs);
2034                 return (err);
2035         }
2036         (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
2037             sizeof (uint64_t));
2038         zp_gen = zp_gen & gen_mask;
2039         if (zp_gen == 0)
2040                 zp_gen = 1;
2041         if (zp->z_unlinked || zp_gen != fid_gen) {
2042                 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
2043                 VN_RELE(ZTOV(zp));
2044                 ZFS_EXIT(zfsvfs);
2045                 return (SET_ERROR(EINVAL));
2046         }
2047 
2048         *vpp = ZTOV(zp);
2049         ZFS_EXIT(zfsvfs);
2050         return (0);
2051 }
2052 
2053 /*
2054  * Block out VOPs and close zfsvfs_t::z_os
2055  *
2056  * Note, if successful, then we return with the 'z_teardown_lock' and
2057  * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
2058  * dataset and objset intact so that they can be atomically handed off during
2059  * a subsequent rollback or recv operation and the resume thereafter.
2060  */
2061 int
2062 zfs_suspend_fs(zfsvfs_t *zfsvfs)
2063 {
2064         int error;
2065 
2066         mutex_enter(&zfsvfs->z_lock);
2067         if (zfsvfs->z_busy) {
2068                 mutex_exit(&zfsvfs->z_lock);
2069                 return (SET_ERROR(EBUSY));
2070         }
2071         zfsvfs->z_busy = B_TRUE;
2072         mutex_exit(&zfsvfs->z_lock);
2073 
2074         if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) {
2075                 mutex_enter(&zfsvfs->z_lock);
2076                 zfsvfs->z_busy = B_FALSE;
2077                 mutex_exit(&zfsvfs->z_lock);
2078                 return (error);
2079         }
2080 
2081         return (0);
2082 }
2083 
2084 /*
2085  * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
2086  * is an invariant across any of the operations that can be performed while the
2087  * filesystem was suspended.  Whether it succeeded or failed, the preconditions
2088  * are the same: the relevant objset and associated dataset are owned by
2089  * zfsvfs, held, and long held on entry.
2090  */
2091 int
2092 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
2093 {
2094         int err;
2095         znode_t *zp;
2096 
2097         ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
2098         ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
2099 
2100         /*
2101          * We already own this, so just update the objset_t, as the one we
2102          * had before may have been evicted.
2103          */
2104         objset_t *os;
2105         VERIFY3P(ds->ds_owner, ==, zfsvfs);
2106         VERIFY(dsl_dataset_long_held(ds));
2107         VERIFY0(dmu_objset_from_ds(ds, &os));
2108 
2109         err = zfsvfs_init(zfsvfs, os);
2110         if (err != 0)
2111                 goto bail;
2112 
2113         VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
2114 
2115         zfs_set_fuid_feature(zfsvfs);
2116 
2117         /*
2118          * Attempt to re-establish all the active znodes with
2119          * their dbufs.  If a zfs_rezget() fails, then we'll let
2120          * any potential callers discover that via ZFS_ENTER_VERIFY_VP
2121          * when they try to use their znode.
2122          */
2123         mutex_enter(&zfsvfs->z_znodes_lock);
2124         for (zp = list_head(&zfsvfs->z_all_znodes); zp;
2125             zp = list_next(&zfsvfs->z_all_znodes, zp)) {
2126                 (void) zfs_rezget(zp);
2127         }
2128         mutex_exit(&zfsvfs->z_znodes_lock);
2129 
2130         if (((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) &&
2131             !zfsvfs->z_unmounted) {
2132                 /*
2133                  * zfs_suspend_fs() could have interrupted freeing
2134                  * of dnodes. We need to restart this freeing so
2135                  * that we don't "leak" the space.
2136                  */
2137                 zfs_unlinked_drain(zfsvfs);
2138         }
2139 
2140 bail:
2141         /* release the VOPs */
2142         rw_exit(&zfsvfs->z_teardown_inactive_lock);
2143         rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
2144 
2145         if (err) {
2146                 /*
2147                  * Since we couldn't setup the sa framework, try to force
2148                  * unmount this file system.
2149                  */
2150                 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
2151                         (void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED());
2152         }
2153         mutex_enter(&zfsvfs->z_lock);
2154         zfsvfs->z_busy = B_FALSE;
2155         mutex_exit(&zfsvfs->z_lock);
2156 
2157         return (err);
2158 }
2159 
2160 static void
2161 zfs_freevfs(vfs_t *vfsp)
2162 {
2163         zfsvfs_t *zfsvfs = vfsp->vfs_data;
2164 
2165         /*
2166          * If this is a snapshot, we have an extra VFS_HOLD on our parent
2167          * from zfs_mount().  Release it here.  If we came through
2168          * zfs_mountroot() instead, we didn't grab an extra hold, so
2169          * skip the VFS_RELE for rootvfs.
2170          */
2171         if (zfsvfs->z_issnap && (vfsp != rootvfs))
2172                 VFS_RELE(zfsvfs->z_parent->z_vfs);
2173 
2174         zfsvfs_free(zfsvfs);
2175 
2176         atomic_dec_32(&zfs_active_fs_count);
2177 }
2178 
2179 /*
2180  * VFS_INIT() initialization.  Note that there is no VFS_FINI(),
2181  * so we can't safely do any non-idempotent initialization here.
2182  * Leave that to zfs_init() and zfs_fini(), which are called
2183  * from the module's _init() and _fini() entry points.
2184  */
2185 /*ARGSUSED*/
2186 static int
2187 zfs_vfsinit(int fstype, char *name)
2188 {
2189         int error;
2190 
2191         zfsfstype = fstype;
2192 
2193         /*
2194          * Setup vfsops and vnodeops tables.
2195          */
2196         error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops);
2197         if (error != 0) {
2198                 cmn_err(CE_WARN, "zfs: bad vfs ops template");
2199         }
2200 
2201         error = zfs_create_op_tables();
2202         if (error) {
2203                 zfs_remove_op_tables();
2204                 cmn_err(CE_WARN, "zfs: bad vnode ops template");
2205                 (void) vfs_freevfsops_by_type(zfsfstype);
2206                 return (error);
2207         }
2208 
2209         mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
2210 
2211         /*
2212          * Unique major number for all zfs mounts.
2213          * If we run out of 32-bit minors, we'll getudev() another major.
2214          */
2215         zfs_major = ddi_name_to_major(ZFS_DRIVER);
2216         zfs_minor = ZFS_MIN_MINOR;
2217 
2218         return (0);
2219 }
2220 
2221 void
2222 zfs_init(void)
2223 {
2224         /*
2225          * Initialize .zfs directory structures
2226          */
2227         zfsctl_init();
2228 
2229         /*
2230          * Initialize znode cache, vnode ops, etc...
2231          */
2232         zfs_znode_init();
2233 
2234         dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
2235 }
2236 
2237 void
2238 zfs_fini(void)
2239 {
2240         zfsctl_fini();
2241         zfs_znode_fini();
2242 }
2243 
2244 int
2245 zfs_busy(void)
2246 {
2247         return (zfs_active_fs_count != 0);
2248 }
2249 
2250 int
2251 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
2252 {
2253         int error;
2254         objset_t *os = zfsvfs->z_os;
2255         dmu_tx_t *tx;
2256 
2257         if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
2258                 return (SET_ERROR(EINVAL));
2259 
2260         if (newvers < zfsvfs->z_version)
2261                 return (SET_ERROR(EINVAL));
2262 
2263         if (zfs_spa_version_map(newvers) >
2264             spa_version(dmu_objset_spa(zfsvfs->z_os)))
2265                 return (SET_ERROR(ENOTSUP));
2266 
2267         tx = dmu_tx_create(os);
2268         dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
2269         if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2270                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
2271                     ZFS_SA_ATTRS);
2272                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2273         }
2274         error = dmu_tx_assign(tx, TXG_WAIT);
2275         if (error) {
2276                 dmu_tx_abort(tx);
2277                 return (error);
2278         }
2279 
2280         error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2281             8, 1, &newvers, tx);
2282 
2283         if (error) {
2284                 dmu_tx_commit(tx);
2285                 return (error);
2286         }
2287 
2288         if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2289                 uint64_t sa_obj;
2290 
2291                 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
2292                     SPA_VERSION_SA);
2293                 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
2294                     DMU_OT_NONE, 0, tx);
2295 
2296                 error = zap_add(os, MASTER_NODE_OBJ,
2297                     ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
2298                 ASSERT0(error);
2299 
2300                 VERIFY(0 == sa_set_sa_object(os, sa_obj));
2301                 sa_register_update_callback(os, zfs_sa_upgrade);
2302         }
2303 
2304         spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
2305             "from %llu to %llu", zfsvfs->z_version, newvers);
2306 
2307         dmu_tx_commit(tx);
2308 
2309         zfsvfs->z_version = newvers;
2310         os->os_version = newvers;
2311 
2312         zfs_set_fuid_feature(zfsvfs);
2313 
2314         return (0);
2315 }
2316 
2317 /*
2318  * Read a property stored within the master node.
2319  */
2320 int
2321 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
2322 {
2323         uint64_t *cached_copy = NULL;
2324 
2325         /*
2326          * Figure out where in the objset_t the cached copy would live, if it
2327          * is available for the requested property.
2328          */
2329         if (os != NULL) {
2330                 switch (prop) {
2331                 case ZFS_PROP_VERSION:
2332                         cached_copy = &os->os_version;
2333                         break;
2334                 case ZFS_PROP_NORMALIZE:
2335                         cached_copy = &os->os_normalization;
2336                         break;
2337                 case ZFS_PROP_UTF8ONLY:
2338                         cached_copy = &os->os_utf8only;
2339                         break;
2340                 case ZFS_PROP_CASE:
2341                         cached_copy = &os->os_casesensitivity;
2342                         break;
2343                 default:
2344                         break;
2345                 }
2346         }
2347         if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
2348                 *value = *cached_copy;
2349                 return (0);
2350         }
2351 
2352         /*
2353          * If the property wasn't cached, look up the file system's value for
2354          * the property. For the version property, we look up a slightly
2355          * different string.
2356          */
2357         const char *pname;
2358         int error = ENOENT;
2359         if (prop == ZFS_PROP_VERSION) {
2360                 pname = ZPL_VERSION_STR;
2361         } else {
2362                 pname = zfs_prop_to_name(prop);
2363         }
2364 
2365         if (os != NULL) {
2366                 ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
2367                 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
2368         }
2369 
2370         if (error == ENOENT) {
2371                 /* No value set, use the default value */
2372                 switch (prop) {
2373                 case ZFS_PROP_VERSION:
2374                         *value = ZPL_VERSION;
2375                         break;
2376                 case ZFS_PROP_NORMALIZE:
2377                 case ZFS_PROP_UTF8ONLY:
2378                         *value = 0;
2379                         break;
2380                 case ZFS_PROP_CASE:
2381                         *value = ZFS_CASE_SENSITIVE;
2382                         break;
2383                 default:
2384                         return (error);
2385                 }
2386                 error = 0;
2387         }
2388 
2389         /*
2390          * If one of the methods for getting the property value above worked,
2391          * copy it into the objset_t's cache.
2392          */
2393         if (error == 0 && cached_copy != NULL) {
2394                 *cached_copy = *value;
2395         }
2396 
2397         return (error);
2398 }
2399 
2400 /*
2401  * Return true if the coresponding vfs's unmounted flag is set.
2402  * Otherwise return false.
2403  * If this function returns true we know VFS unmount has been initiated.
2404  */
2405 boolean_t
2406 zfs_get_vfs_flag_unmounted(objset_t *os)
2407 {
2408         zfsvfs_t *zfvp;
2409         boolean_t unmounted = B_FALSE;
2410 
2411         ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);
2412 
2413         mutex_enter(&os->os_user_ptr_lock);
2414         zfvp = dmu_objset_get_user(os);
2415         if (zfvp != NULL && zfvp->z_vfs != NULL &&
2416             (zfvp->z_vfs->vfs_flag & VFS_UNMOUNTED))
2417                 unmounted = B_TRUE;
2418         mutex_exit(&os->os_user_ptr_lock);
2419 
2420         return (unmounted);
2421 }
2422 
2423 static vfsdef_t vfw = {
2424         VFSDEF_VERSION,
2425         MNTTYPE_ZFS,
2426         zfs_vfsinit,
2427         VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS|
2428             VSW_XID|VSW_ZMOUNT,
2429         &zfs_mntopts
2430 };
2431 
2432 struct modlfs zfs_modlfs = {
2433         &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw
2434 };