Print this page
NEX-4794 Write Back Cache sync and async writes: adjust routing according to watermark limits
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
5269 zpool import slow
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Dan McDonald <danmcd@omniti.com>


 336         uint64_t        lr_domcnt;      /* number of unique domains */
 337         uint64_t        lr_fuidcnt;     /* number of real fuids */
 338         uint64_t        lr_acl_bytes;   /* number of bytes in ACL */
 339         uint64_t        lr_acl_flags;   /* ACL flags */
 340         /* lr_acl_bytes number of variable sized ace's follows */
 341 } lr_acl_t;
 342 
 343 /*
 344  * ZIL structure definitions, interface function prototype and globals.
 345  */
 346 
 347 /*
 348  * Writes are handled in three different ways:
 349  *
 350  * WR_INDIRECT:
 351  *    In this mode, if we need to commit the write later, then the block
 352  *    is immediately written into the file system (using dmu_sync),
 353  *    and a pointer to the block is put into the log record.
 354  *    When the txg commits the block is linked in.
 355  *    This saves additionally writing the data into the log record.
 356  *    There are a few requirements for this to occur:
 357  *      - write is greater than zfs/zvol_immediate_write_sz
 358  *      - not using slogs (as slogs are assumed to always be faster
 359  *        than writing into the main pool)
 360  *      - the write occupies only one block

 361  * WR_COPIED:
 362  *    If we know we'll immediately be committing the
 363  *    transaction (FSYNC or FDSYNC), the we allocate a larger
 364  *    log record here for the data and copy the data in.
 365  * WR_NEED_COPY:
 366  *    Otherwise we don't allocate a buffer, and *if* we need to
 367  *    flush the write later then a buffer is allocated and
 368  *    we retrieve the data using the dmu.
 369  */
 370 typedef enum {
 371         WR_INDIRECT,    /* indirect - a large write (dmu_sync() data */
 372                         /* and put blkptr in log, rather than actual data) */
 373         WR_COPIED,      /* immediate - data is copied into lr_write_t */
 374         WR_NEED_COPY,   /* immediate - data needs to be copied if pushed */
 375         WR_NUM_STATES   /* number of states */
 376 } itx_wr_state_t;
 377 
 378 typedef struct itx {
 379         list_node_t     itx_node;       /* linkage on zl_itx_list */
 380         void            *itx_private;   /* type-specific opaque data */


 402 extern zilog_t  *zil_alloc(objset_t *os, zil_header_t *zh_phys);
 403 extern void     zil_free(zilog_t *zilog);
 404 
 405 extern zilog_t  *zil_open(objset_t *os, zil_get_data_t *get_data);
 406 extern void     zil_close(zilog_t *zilog);
 407 
 408 extern void     zil_replay(objset_t *os, void *arg,
 409     zil_replay_func_t *replay_func[TX_MAX_TYPE]);
 410 extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx);
 411 extern void     zil_destroy(zilog_t *zilog, boolean_t keep_first);
 412 extern void     zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx);
 413 extern void     zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
 414 
 415 extern itx_t    *zil_itx_create(uint64_t txtype, size_t lrsize);
 416 extern void     zil_itx_destroy(itx_t *itx);
 417 extern void     zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
 418 
 419 extern void     zil_commit(zilog_t *zilog, uint64_t oid);
 420 extern void     zil_commit_impl(zilog_t *zilog, uint64_t oid);
 421 
 422 extern int      zil_reset(const char *osname, void *txarg);
 423 extern int      zil_claim(struct dsl_pool *dp,
 424     struct dsl_dataset *ds, void *txarg);
 425 extern int      zil_check_log_chain(struct dsl_pool *dp,
 426     struct dsl_dataset *ds, void *tx);
 427 extern void     zil_sync(zilog_t *zilog, dmu_tx_t *tx);
 428 extern void     zil_clean(zilog_t *zilog, uint64_t synced_txg);
 429 
 430 extern int      zil_suspend(const char *osname, void **cookiep);
 431 extern void     zil_resume(void *cookie);
 432 
 433 extern void     zil_lwb_add_block(struct lwb *lwb, const blkptr_t *bp);
 434 extern void     zil_lwb_add_txg(struct lwb *lwb, uint64_t txg);
 435 extern int      zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp);
 436 
 437 extern void     zil_set_sync(zilog_t *zilog, uint64_t syncval);
 438 
 439 extern void     zil_set_logbias(zilog_t *zilog, uint64_t slogval);
 440 
 441 extern int zil_replay_disable;
 442 


 336         uint64_t        lr_domcnt;      /* number of unique domains */
 337         uint64_t        lr_fuidcnt;     /* number of real fuids */
 338         uint64_t        lr_acl_bytes;   /* number of bytes in ACL */
 339         uint64_t        lr_acl_flags;   /* ACL flags */
 340         /* lr_acl_bytes number of variable sized ace's follows */
 341 } lr_acl_t;
 342 
 343 /*
 344  * ZIL structure definitions, interface function prototype and globals.
 345  */
 346 
 347 /*
 348  * Writes are handled in three different ways:
 349  *
 350  * WR_INDIRECT:
 351  *    In this mode, if we need to commit the write later, then the block
 352  *    is immediately written into the file system (using dmu_sync),
 353  *    and a pointer to the block is put into the log record.
 354  *    When the txg commits the block is linked in.
 355  *    This saves additionally writing the data into the log record.
 356  *    There are a few requirements for this to occur. In general, WR_INDIRECT
 357  *    with the subsequent dmu_sync-ing of the data directly into the filesystem
 358  *    is used in the two following separate cases:
 359  *      - logbias = THROUGHPUT mode
 360  *      - writeback cache (via special vdev) when there is no separate slog
 361  *    For specific details and configurable tunables - see zfs_log_write().
 362  * WR_COPIED:
 363  *    If we know we'll immediately be committing the
 364  *    transaction (FSYNC or FDSYNC), the we allocate a larger
 365  *    log record here for the data and copy the data in.
 366  * WR_NEED_COPY:
 367  *    Otherwise we don't allocate a buffer, and *if* we need to
 368  *    flush the write later then a buffer is allocated and
 369  *    we retrieve the data using the dmu.
 370  */
 371 typedef enum {
 372         WR_INDIRECT,    /* indirect - a large write (dmu_sync() data */
 373                         /* and put blkptr in log, rather than actual data) */
 374         WR_COPIED,      /* immediate - data is copied into lr_write_t */
 375         WR_NEED_COPY,   /* immediate - data needs to be copied if pushed */
 376         WR_NUM_STATES   /* number of states */
 377 } itx_wr_state_t;
 378 
 379 typedef struct itx {
 380         list_node_t     itx_node;       /* linkage on zl_itx_list */
 381         void            *itx_private;   /* type-specific opaque data */


 403 extern zilog_t  *zil_alloc(objset_t *os, zil_header_t *zh_phys);
 404 extern void     zil_free(zilog_t *zilog);
 405 
 406 extern zilog_t  *zil_open(objset_t *os, zil_get_data_t *get_data);
 407 extern void     zil_close(zilog_t *zilog);
 408 
 409 extern void     zil_replay(objset_t *os, void *arg,
 410     zil_replay_func_t *replay_func[TX_MAX_TYPE]);
 411 extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx);
 412 extern void     zil_destroy(zilog_t *zilog, boolean_t keep_first);
 413 extern void     zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx);
 414 extern void     zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
 415 
 416 extern itx_t    *zil_itx_create(uint64_t txtype, size_t lrsize);
 417 extern void     zil_itx_destroy(itx_t *itx);
 418 extern void     zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
 419 
 420 extern void     zil_commit(zilog_t *zilog, uint64_t oid);
 421 extern void     zil_commit_impl(zilog_t *zilog, uint64_t oid);
 422 
 423 extern int      zil_vdev_offline(const char *osname, void *txarg);
 424 extern int      zil_claim(struct dsl_pool *dp,
 425     struct dsl_dataset *ds, void *txarg);
 426 extern int      zil_check_log_chain(struct dsl_pool *dp,
 427     struct dsl_dataset *ds, void *tx);
 428 extern void     zil_sync(zilog_t *zilog, dmu_tx_t *tx);
 429 extern void     zil_clean(zilog_t *zilog, uint64_t synced_txg);
 430 
 431 extern int      zil_suspend(const char *osname, void **cookiep);
 432 extern void     zil_resume(void *cookie);
 433 
 434 extern void     zil_lwb_add_block(struct lwb *lwb, const blkptr_t *bp);
 435 extern void     zil_lwb_add_txg(struct lwb *lwb, uint64_t txg);
 436 extern int      zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp);
 437 
 438 extern void     zil_set_sync(zilog_t *zilog, uint64_t syncval);
 439 
 440 extern void     zil_set_logbias(zilog_t *zilog, uint64_t slogval);
 441 
 442 extern int zil_replay_disable;
 443