1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  14  */
  15 
  16 #ifndef _SYS_WRCACHE_H
  17 #define _SYS_WRCACHE_H
  18 
  19 #include <sys/zfs_context.h>
  20 #include <sys/sysmacros.h>
  21 #include <sys/types.h>
  22 #include <sys/fs/zfs.h>
  23 #include <sys/spa.h>
  24 #include <sys/dmu.h>
  25 #include <sys/dmu_traverse.h>
  26 #include <sys/dsl_dataset.h>
  27 #include <sys/dsl_pool.h>
  28 
  29 #ifdef  __cplusplus
  30 extern "C" {
  31 #endif
  32 
  33 /*
  34  * field 'blk_prop' of wbc_block_t
  35  *
  36  * 64              48               32              16              0
  37  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  38  *  |      RESERVED    |D(1)|comp(7)|  PSIZE(16)    |     LSIZE(16) |
  39  *      +-------+-------+-------+-------+-------+-------+-------+-------+
  40  *
  41  * Legend:
  42  * D                    deleted
  43  * comp                 compression function of payload
  44  * PSIZE                size of payload after compression, in bytes
  45  * LSIZE                logical size of payload, in bytes
  46  *
  47  * Deleted block is a block whos dva was freed,
  48  * so this block must not be used by wbc_move_logic
  49  * and after move has been finished need to free only block-structure
  50  */
  51 #define WBCBP_GET_LSIZE(bp)     \
  52         BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
  53 #define WBCBP_SET_LSIZE(bp, x)  do { \
  54         BF64_SET_SB((bp)->blk_prop, \
  55             0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
  56 _NOTE(CONSTCOND) } while (0)
  57 
  58 #define WBCBP_GET_PSIZE(bp)     \
  59         BF64_GET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
  60 #define WBCBP_SET_PSIZE(bp, x)  do { \
  61         BF64_SET_SB((bp)->blk_prop, \
  62             16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
  63 _NOTE(CONSTCOND) } while (0)
  64 
  65 #define WBCBP_GET_COMPRESS(bp)          BF64_GET((bp)->blk_prop, 32, 7)
  66 #define WBCBP_SET_COMPRESS(bp, x)       BF64_SET((bp)->blk_prop, 32, 7, x)
  67 
  68 #define WBCBP_IS_DELETED(wbcbp)         BF64_GET((wbcbp)->blk_prop, 39, 1)
  69 #define WBCBP_MARK_DELETED(wbcbp)       BF64_SET((wbcbp)->blk_prop, 39, 1, 1)
  70 
  71 typedef struct wbc_data wbc_data_t;
  72 
  73 /*
  74  * WBC Instance is a dataset (DS) and
  75  * all the children DSs of that DS.
  76  */
  77 typedef struct wbc_instance {
  78         avl_node_t      node;
  79 
  80         wbc_data_t      *wbc_data;
  81         void            *wbc_autosnap_hdl;
  82         char            ds_name[ZFS_MAX_DATASET_NAME_LEN];
  83 
  84         /* copy of dsl_dataset_t->ds_object */
  85         uint64_t        ds_object;
  86 
  87         /*
  88          * TXG of the right boundary WBC-window
  89          * can be 0 if this instance is 'idle'
  90          */
  91         uint64_t        txg_to_rele;
  92 
  93         /*
  94          * txg of the specific TXG sync that
  95          * executed 'off' on this instance
  96          */
  97         uint64_t        txg_off;
  98 
  99         boolean_t       fini_migration;
 100         boolean_t       fini_done;
 101 } wbc_instance_t;
 102 
 103 /*
 104  * WBC statistics
 105  */
 106 typedef struct wbc_stat {
 107         uint64_t        wbc_spa_util;           /* spa average utilization */
 108         clock_t         wbc_stat_lbolt;         /* last statistics update */
 109         boolean_t       wbc_stat_update;        /* statstics update flag */
 110 } wbc_stat_t;
 111 
 112 /*
 113  * wbc_data is a global per ZFS pool structure contains all
 114  * information associated with write cache and
 115  * is attached to spa structure.
 116  */
 117 struct wbc_data {
 118         kthread_t       *wbc_init_thread;
 119         kthread_t       *wbc_thread;            /* move thread */
 120         kthread_t       *wbc_walk_thread;       /* collector thread */
 121 
 122         kmutex_t        wbc_lock;
 123         kcondvar_t      wbc_cv;
 124 
 125         /* TASKQ that does async finalization of wbc_instances */
 126         taskq_t         *wbc_instance_fini;
 127 
 128         uint64_t        wbc_instance_fini_cnt;
 129 
 130         avl_tree_t      wbc_blocks;             /* collected blocks */
 131         avl_tree_t      wbc_moved_blocks;       /* moved blocks */
 132 
 133         uint64_t        wbc_window_bytes;       /* bytes in current wnd */
 134         uint64_t        wbc_altered_bytes;      /* bytes altered in new wnd */
 135         uint64_t        wbc_roll_threshold;     /* max percent can be altered */
 136         uint64_t        wbc_altered_limit;      /* max bytes can be altered */
 137 
 138         uint64_t        wbc_blocks_count;       /* amount of blocks */
 139 
 140         taskq_t         *wbc_move_taskq;        /* pending blocks taskq */
 141         uint64_t        wbc_move_threads;       /* taskq number of threads */
 142 
 143         uint64_t        wbc_start_txg;          /* left boundary */
 144         uint64_t        wbc_finish_txg;         /* right boundary */
 145         uint64_t        wbc_txg_to_rele;        /* txg to rele */
 146         uint64_t        wbc_blocks_in;          /* collected */
 147         uint64_t        wbc_blocks_out;         /* planned */
 148         uint64_t        wbc_blocks_mv;          /* moved */
 149         uint64_t        wbc_blocks_mv_last; /* latest number of moved blocks */
 150 
 151         uint64_t        wbc_latest_window_time;
 152 
 153         /* Tree of watched datasets and corresponding data */
 154         avl_tree_t      wbc_instances;
 155         boolean_t       wbc_ready_to_use;
 156 
 157         spa_t           *wbc_spa;               /* parent spa */
 158         wbc_stat_t      wbc_stat;               /* WBC statistics */
 159 
 160         uint64_t        wbc_fault_moves;        /* amount of fault moves */
 161 
 162         boolean_t       wbc_purge;      /* should purge queued blocks */
 163         boolean_t       wbc_walk;       /* should walk */
 164         boolean_t       wbc_locked;     /* do not walk while locked */
 165         boolean_t       wbc_walking;    /* currently walking */
 166         boolean_t       wbc_wait_for_window;
 167 
 168         boolean_t       wbc_delete;     /* delete state */
 169 
 170         boolean_t       wbc_thr_exit;   /* exit flag */
 171         boolean_t       wbc_isvalid;    /* WBC is inited */
 172         boolean_t       wbc_first_move; /* TRUE until the 1 WBC-win opened */
 173 };
 174 
 175 /* !!! Do not change these constants !!! */
 176 #define WBC_SPECIAL_DVA 0
 177 #define WBC_NORMAL_DVA 1
 178 
 179 /*
 180  * In-core representation of a block which will be moved
 181  */
 182 typedef struct wbc_block {
 183         avl_node_t      node;
 184 
 185         /* associated WBC */
 186         wbc_data_t      *data;
 187 
 188         /*
 189          * size, compression
 190          */
 191         uint64_t        blk_prop;
 192 
 193         /* birth txg for arc lookup */
 194         uint64_t        btxg;
 195 
 196         /* dvas of blocks to move */
 197         dva_t           dva[2];
 198 
 199         kmutex_t        lock;
 200 } wbc_block_t;
 201 
 202 typedef struct wbc_parseblock_cb {
 203         wbc_data_t      *wbc_data;
 204 
 205         /*
 206          * A bookmark for resume
 207          */
 208         zbookmark_phys_t        zb;
 209 
 210         /*
 211          * Total size of all collected blocks
 212          */
 213         uint64_t        bt_size;
 214 
 215         /*
 216          * The time we started traversal process
 217          */
 218         hrtime_t        start_time;
 219 
 220         uint64_t        actv_txg;
 221 } wbc_parseblock_cb_t;
 222 
 223 /*
 224  * This structure describes ZFS_PROP_WBC_MODE property
 225  */
 226 typedef struct {
 227         /*
 228          * copy of dsl_dataset_t->ds_object of dataset
 229          * for which user does wbc_mode=on
 230          */
 231         uint64_t        root_ds_object;
 232 
 233         /*
 234          * TXG when user did 'wbc_mode=off'
 235          */
 236         uint64_t        txg_off;
 237 
 238         /*
 239          * Flags. Now is not used.
 240          */
 241         uint64_t        flags;
 242 } wbc_mode_prop_val_t;
 243 
 244 #define WBC_MODE_PROP_VAL_SZ (sizeof (wbc_mode_prop_val_t) / sizeof (uint64_t))
 245 
 246 void wbc_init(wbc_data_t *wbc_data, spa_t *spa);
 247 void wbc_fini(wbc_data_t *wbc_data);
 248 
 249 void wbc_activate(spa_t *spa, boolean_t pool_creation);
 250 void wbc_deactivate(spa_t *spa);
 251 
 252 int wbc_select_dva(wbc_data_t *wbc_data, zio_t *zio);
 253 boolean_t wbc_bp_is_migrated(wbc_data_t *wbc_data, const blkptr_t *bp);
 254 int wbc_first_valid_dva(const blkptr_t *bp,
 255     wbc_data_t *wbc_data, boolean_t removal);
 256 
 257 wbc_data_t *spa_get_wbc_data(spa_t *spa);
 258 
 259 void wbc_add_bytes(spa_t *spa, uint64_t txg, uint64_t bytes);
 260 
 261 /*
 262  * write cache thread.
 263  */
 264 void wbc_start_thread(spa_t *);
 265 boolean_t wbc_stop_thread(spa_t *);
 266 void wbc_trigger_wbcthread(spa_t *, uint64_t);
 267 
 268 /*
 269  * callback function for traverse_dataset which validates
 270  * the block pointer and adds to the list.
 271  */
 272 blkptr_cb_t     wbc_traverse_ds_cb;
 273 
 274 boolean_t wbc_check_parseblocks_hold(spa_t *);
 275 void wbc_check_parseblocks_rele(spa_t *spa);
 276 boolean_t wbc_try_hold(spa_t *);
 277 int wbc_walk_lock(spa_t *);
 278 void wbc_walk_unlock(spa_t *);
 279 
 280 void wbc_process_objset(wbc_data_t *wbc_data, objset_t *os, boolean_t destroy);
 281 void wbc_mode_changed(void *arg, uint64_t newval);
 282 int wbc_check_dataset(const char *ds_name);
 283 
 284 boolean_t wbc_try_disable(wbc_data_t *wbc_data);
 285 
 286 #ifdef  __cplusplus
 287 }
 288 #endif
 289 
 290 #endif  /* _SYS_WRCACHE_H */