Print this page
    
NEX-9200 Improve the scalability of attribute locking in zfs_zget
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-9436 Rate limiting controls (was QoS) per ZFS dataset, updates from demo
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-8972 Async-delete side-effect that may cause unmount EBUSY
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-8852 Quality-of-Service (QoS) controls per NFS share
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-3762 Appliance crashes with a NULL pointer dereference during a zpool export when a zfs_vn_rele_taskq thread attempts to check a bogus rwlock from rw_write_held
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
OS-80 support for vdev and CoS properties for the new I/O scheduler
OS-95 lint warning introduced by OS-61
Fixup merge results
re #13204 rb4280 zfs receive/rollback deadlock
re #6815 rb1758 need WORM in nza-kernel (4.0)
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h
          +++ new/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  23   24   */
  24   25  
  25   26  #ifndef _SYS_FS_ZFS_VFSOPS_H
  26   27  #define _SYS_FS_ZFS_VFSOPS_H
  27   28  
  28   29  #include <sys/isa_defs.h>
  29   30  #include <sys/types32.h>
  30   31  #include <sys/list.h>
  31   32  #include <sys/vfs.h>
  32   33  #include <sys/zil.h>
  33   34  #include <sys/sa.h>
  
    | 
      ↓ open down ↓ | 
    1 lines elided | 
    
      ↑ open up ↑ | 
  
  34   35  #include <sys/rrwlock.h>
  35   36  #include <sys/zfs_ioctl.h>
  36   37  
  37   38  #ifdef  __cplusplus
  38   39  extern "C" {
  39   40  #endif
  40   41  
  41   42  typedef struct zfsvfs zfsvfs_t;
  42   43  struct znode;
  43   44  
       45 +/*
       46 + * ZFS Quality of Service (QoS) I/O throttling state,
       47 + * per file system.  Limits the I/O rate in this FS.
       48 + * See "Token Bucket" on Wikipedia
       49 + */
       50 +typedef struct zfs_rate_state {
       51 +        uint64_t rate_cap;              /* zero means no cap */
       52 +        int64_t rate_token_bucket;      /* bytes I/O allowed without waiting */
       53 +        hrtime_t rate_last_update;
       54 +        kmutex_t rate_lock;
       55 +        kcondvar_t rate_wait_cv;
       56 +        int rate_waiters;
       57 +} zfs_rate_state_t;
       58 +
       59 +/*
       60 + * Status of the zfs_unlinked_drain thread.
       61 + */
       62 +typedef enum drain_state {
       63 +        ZFS_DRAIN_SHUTDOWN = 0,
       64 +        ZFS_DRAIN_RUNNING,
       65 +        ZFS_DRAIN_SHUTDOWN_REQ
       66 +} drain_state_t;
       67 +
  44   68  struct zfsvfs {
  45   69          vfs_t           *z_vfs;         /* generic fs struct */
  46   70          zfsvfs_t        *z_parent;      /* parent fs */
  47   71          objset_t        *z_os;          /* objset reference */
  48   72          uint64_t        z_root;         /* id of root znode */
  49   73          uint64_t        z_unlinkedobj;  /* id of unlinked zapobj */
  50   74          uint64_t        z_max_blksz;    /* maximum block size for files */
  51   75          uint64_t        z_fuid_obj;     /* fuid table object number */
  52   76          uint64_t        z_fuid_size;    /* fuid table size */
  53   77          avl_tree_t      z_fuid_idx;     /* fuid tree keyed by index */
  54   78          avl_tree_t      z_fuid_domain;  /* fuid tree keyed by domain */
  55   79          krwlock_t       z_fuid_lock;    /* fuid lock */
  56   80          boolean_t       z_fuid_loaded;  /* fuid tables are loaded */
  57   81          boolean_t       z_fuid_dirty;   /* need to sync fuid table ? */
  58   82          struct zfs_fuid_info    *z_fuid_replay; /* fuid info for replay */
  59   83          zilog_t         *z_log;         /* intent log pointer */
  60   84          uint_t          z_acl_mode;     /* acl chmod/mode behavior */
  
    | 
      ↓ open down ↓ | 
    7 lines elided | 
    
      ↑ open up ↑ | 
  
  61   85          uint_t          z_acl_inherit;  /* acl inheritance behavior */
  62   86          zfs_case_t      z_case;         /* case-sense */
  63   87          boolean_t       z_utf8;         /* utf8-only */
  64   88          int             z_norm;         /* normalization flags */
  65   89          boolean_t       z_atime;        /* enable atimes mount option */
  66   90          boolean_t       z_unmounted;    /* unmounted */
  67   91          rrmlock_t       z_teardown_lock;
  68   92          krwlock_t       z_teardown_inactive_lock;
  69   93          list_t          z_all_znodes;   /* all vnodes in the fs */
  70   94          kmutex_t        z_znodes_lock;  /* lock for z_all_znodes */
       95 +        uint_t          z_znodes_freeing_cnt; /* number of znodes to be freed */
  71   96          vnode_t         *z_ctldir;      /* .zfs directory pointer */
  72   97          boolean_t       z_show_ctldir;  /* expose .zfs in the root dir */
  73   98          boolean_t       z_issnap;       /* true if this is a snapshot */
  74   99          boolean_t       z_vscan;        /* virus scan on/off */
  75  100          boolean_t       z_use_fuids;    /* version allows fuids */
  76  101          boolean_t       z_replay;       /* set during ZIL replay */
  77  102          boolean_t       z_use_sa;       /* version allow system attributes */
  78  103          uint64_t        z_version;      /* ZPL version */
  79  104          uint64_t        z_shares_dir;   /* hidden shares dir */
  80  105          kmutex_t        z_lock;
  81  106          uint64_t        z_userquota_obj;
  82  107          uint64_t        z_groupquota_obj;
  83  108          uint64_t        z_replay_eof;   /* New end of file - replay only */
  84  109          sa_attr_type_t  *z_attr_table;  /* SA attr mapping->id */
  85      -#define ZFS_OBJ_MTX_SZ  64
  86      -        kmutex_t        z_hold_mtx[ZFS_OBJ_MTX_SZ];     /* znode hold locks */
      110 +        boolean_t       z_isworm;       /* true if this is a WORM FS */
      111 +        /* true if suspend-resume cycle is in progress */
      112 +        boolean_t       z_busy;
      113 +        int     z_hold_mtx_sz; /* the size of z_hold_mtx array */
      114 +        kmutex_t        *z_hold_mtx;    /* znode hold locks */
      115 +        /* for controlling async zfs_unlinked_drain */
      116 +        kmutex_t        z_drain_lock;
      117 +        kcondvar_t      z_drain_cv;
      118 +        drain_state_t   z_drain_state;
      119 +        zfs_rate_state_t z_rate;
  87  120  };
  88  121  
  89  122  /*
  90  123   * Normal filesystems (those not under .zfs/snapshot) have a total
  91  124   * file ID size limited to 12 bytes (including the length field) due to
  92  125   * NFSv2 protocol's limitation of 32 bytes for a filehandle.  For historical
  93  126   * reasons, this same limit is being imposed by the Solaris NFSv3 implementation
  94  127   * (although the NFSv3 protocol actually permits a maximum of 64 bytes).  It
  95  128   * is not possible to expand beyond 12 bytes without abandoning support
  96  129   * of NFSv2.
  97  130   *
  98  131   * For normal filesystems, we partition up the available space as follows:
  99  132   *      2 bytes         fid length (required)
 100  133   *      6 bytes         object number (48 bits)
 101  134   *      4 bytes         generation number (32 bits)
 102  135   *
 103  136   * We reserve only 48 bits for the object number, as this is the limit
 104  137   * currently defined and imposed by the DMU.
 105  138   */
 106  139  typedef struct zfid_short {
 107  140          uint16_t        zf_len;
 108  141          uint8_t         zf_object[6];           /* obj[i] = obj >> (8 * i) */
 109  142          uint8_t         zf_gen[4];              /* gen[i] = gen >> (8 * i) */
 110  143  } zfid_short_t;
 111  144  
 112  145  /*
 113  146   * Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
 114  147   * (including the length field).  This makes files under .zfs/snapshot
 115  148   * accessible by NFSv3 and NFSv4, but not NFSv2.
 116  149   *
 117  150   * For files under .zfs/snapshot, we partition up the available space
 118  151   * as follows:
 119  152   *      2 bytes         fid length (required)
 120  153   *      6 bytes         object number (48 bits)
 121  154   *      4 bytes         generation number (32 bits)
 122  155   *      6 bytes         objset id (48 bits)
 123  156   *      4 bytes         currently just zero (32 bits)
 124  157   *
 125  158   * We reserve only 48 bits for the object number and objset id, as these are
 126  159   * the limits currently defined and imposed by the DMU.
 127  160   */
 128  161  typedef struct zfid_long {
 129  162          zfid_short_t    z_fid;
 130  163          uint8_t         zf_setid[6];            /* obj[i] = obj >> (8 * i) */
 131  164          uint8_t         zf_setgen[4];           /* gen[i] = gen >> (8 * i) */
 132  165  } zfid_long_t;
 133  166  
 134  167  #define SHORT_FID_LEN   (sizeof (zfid_short_t) - sizeof (uint16_t))
 135  168  #define LONG_FID_LEN    (sizeof (zfid_long_t) - sizeof (uint16_t))
 136  169  
 137  170  extern uint_t zfs_fsyncer_key;
 138  171  
 139  172  extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
 140  173  extern int zfs_resume_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
 141  174  extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 142  175      const char *domain, uint64_t rid, uint64_t *valuep);
 143  176  extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 144  177      uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
 145  178  extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 146  179      const char *domain, uint64_t rid, uint64_t quota);
 147  180  extern boolean_t zfs_owner_overquota(zfsvfs_t *zfsvfs, struct znode *,
 148  181      boolean_t isgroup);
 149  182  extern boolean_t zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup,
 150  183      uint64_t fuid);
 151  184  extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
 152  185  extern int zfsvfs_create(const char *name, zfsvfs_t **zfvp);
 153  186  extern int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os);
 154  187  extern void zfsvfs_free(zfsvfs_t *zfsvfs);
 155  188  extern int zfs_check_global_label(const char *dsname, const char *hexsl);
 156  189  
 157  190  #ifdef  __cplusplus
 158  191  }
 159  192  #endif
 160  193  
 161  194  #endif  /* _SYS_FS_ZFS_VFSOPS_H */
  
    | 
      ↓ open down ↓ | 
    65 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX