Print this page
NEX-19083 backport OS-7314 zil_commit should omit cache thrash
9962 zil_commit should omit cache thrash
Reviewed by: Matt Ahrens <matt@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Joshua M. Clulow <josh@sysmgr.org>
NEX-9752 backport illumos 6950 ARC should cache compressed data
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
6950 ARC should cache compressed data
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Don Brady <don.brady@intel.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-5367 special vdev: sync-write options (NEW)
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
5269 zpool import slow
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Dan McDonald <danmcd@omniti.com>
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
Bug 11205: add missing libzfs_closed_stubs.c to fix opensource-only build.
ZFS plus work: special vdevs, cos, cos/vdev properties

*** 18,27 **** --- 18,28 ---- * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ /* Portions Copyright 2010 Robert Milkowski */
*** 89,99 **** int zfs_commit_timeout_pct = 5; /* * Disable intent logging replay. This global ZIL switch affects all pools. */ ! int zil_replay_disable = 0; /* * Tunable parameter for debugging or performance analysis. Setting * zfs_nocacheflush will cause corruption on power loss if a volatile * out-of-order write cache is enabled. --- 90,100 ---- int zfs_commit_timeout_pct = 5; /* * Disable intent logging replay. This global ZIL switch affects all pools. */ ! int zil_replay_disable = 0; /* disable intent logging replay */ /* * Tunable parameter for debugging or performance analysis. Setting * zfs_nocacheflush will cause corruption on power loss if a volatile * out-of-order write cache is enabled.
*** 516,526 **** ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); ASSERT3P(lwb->lwb_write_zio, ==, NULL); ASSERT3P(lwb->lwb_root_zio, ==, NULL); ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa)); ASSERT(lwb->lwb_state == LWB_STATE_CLOSED || ! lwb->lwb_state == LWB_STATE_DONE); /* * Clear the zilog's field to indicate this lwb is no longer * valid, and prevent use-after-free errors. */ --- 517,527 ---- ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); ASSERT3P(lwb->lwb_write_zio, ==, NULL); ASSERT3P(lwb->lwb_root_zio, ==, NULL); ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa)); ASSERT(lwb->lwb_state == LWB_STATE_CLOSED || ! lwb->lwb_state == LWB_STATE_FLUSH_DONE); /* * Clear the zilog's field to indicate this lwb is no longer * valid, and prevent use-after-free errors. */
*** 874,884 **** mutex_enter(&zcw->zcw_lock); ASSERT(!list_link_active(&zcw->zcw_node)); ASSERT3P(zcw->zcw_lwb, ==, NULL); ASSERT3P(lwb, !=, NULL); ASSERT(lwb->lwb_state == LWB_STATE_OPENED || ! lwb->lwb_state == LWB_STATE_ISSUED); list_insert_tail(&lwb->lwb_waiters, zcw); zcw->zcw_lwb = lwb; mutex_exit(&zcw->zcw_lock); } --- 875,886 ---- mutex_enter(&zcw->zcw_lock); ASSERT(!list_link_active(&zcw->zcw_node)); ASSERT3P(zcw->zcw_lwb, ==, NULL); ASSERT3P(lwb, !=, NULL); ASSERT(lwb->lwb_state == LWB_STATE_OPENED || ! lwb->lwb_state == LWB_STATE_ISSUED || ! lwb->lwb_state == LWB_STATE_WRITE_DONE); list_insert_tail(&lwb->lwb_waiters, zcw); zcw->zcw_lwb = lwb; mutex_exit(&zcw->zcw_lock); }
*** 920,939 **** } } mutex_exit(&lwb->lwb_vdev_lock); } void zil_lwb_add_txg(lwb_t *lwb, uint64_t txg) { lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg); } /* ! * This function is a called after all VDEVs associated with a given lwb * write have completed their DKIOCFLUSHWRITECACHE command; or as soon ! * as the lwb write completes, if "zfs_nocacheflush" is set. * * The intention is for this function to be called as soon as the * contents of an lwb are considered "stable" on disk, and will survive * any sudden loss of power. At this point, any threads waiting for the * lwb to reach this state are signalled, and the "waiter" structures --- 922,981 ---- } } mutex_exit(&lwb->lwb_vdev_lock); } + static void + zil_lwb_flush_defer(lwb_t *lwb, lwb_t *nlwb) + { + avl_tree_t *src = &lwb->lwb_vdev_tree; + avl_tree_t *dst = &nlwb->lwb_vdev_tree; + void *cookie = NULL; + zil_vdev_node_t *zv; + + ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); + ASSERT3S(nlwb->lwb_state, !=, LWB_STATE_WRITE_DONE); + ASSERT3S(nlwb->lwb_state, !=, LWB_STATE_FLUSH_DONE); + + /* + * While 'lwb' is at a point in its lifetime where lwb_vdev_tree does + * not need the protection of lwb_vdev_lock (it will only be modified + * while holding zilog->zl_lock) as its writes and those of its + * children have all completed. The younger 'nlwb' may be waiting on + * future writes to additional vdevs. + */ + mutex_enter(&nlwb->lwb_vdev_lock); + /* + * Tear down the 'lwb' vdev tree, ensuring that entries which do not + * exist in 'nlwb' are moved to it, freeing any would-be duplicates. + */ + while ((zv = avl_destroy_nodes(src, &cookie)) != NULL) { + avl_index_t where; + + if (avl_find(dst, zv, &where) == NULL) { + avl_insert(dst, zv, where); + } else { + kmem_free(zv, sizeof (*zv)); + } + } + mutex_exit(&nlwb->lwb_vdev_lock); + } + void zil_lwb_add_txg(lwb_t *lwb, uint64_t txg) { lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg); } /* ! * This function is a called after all vdevs associated with a given lwb * write have completed their DKIOCFLUSHWRITECACHE command; or as soon ! * as the lwb write completes, if "zil_nocacheflush" is set. Further, ! * all "previous" lwb's will have completed before this function is ! * called; i.e. this function is called for all previous lwbs before ! * it's called for "this" lwb (enforced via zio the dependencies ! * configured in zil_lwb_set_zio_dependency()). * * The intention is for this function to be called as soon as the * contents of an lwb are considered "stable" on disk, and will survive * any sudden loss of power. At this point, any threads waiting for the * lwb to reach this state are signalled, and the "waiter" structures
*** 966,977 **** ASSERT3U(lwb->lwb_issued_timestamp, >, 0); zilog->zl_last_lwb_latency = gethrtime() - lwb->lwb_issued_timestamp; lwb->lwb_root_zio = NULL; - lwb->lwb_state = LWB_STATE_DONE; if (zilog->zl_last_lwb_opened == lwb) { /* * Remember the highest committed log sequence number * for ztest. We only update this value when all the log * writes succeeded, because ztest wants to ASSERT that --- 1008,1021 ---- ASSERT3U(lwb->lwb_issued_timestamp, >, 0); zilog->zl_last_lwb_latency = gethrtime() - lwb->lwb_issued_timestamp; lwb->lwb_root_zio = NULL; + ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); + lwb->lwb_state = LWB_STATE_FLUSH_DONE; + if (zilog->zl_last_lwb_opened == lwb) { /* * Remember the highest committed log sequence number * for ztest. We only update this value when all the log * writes succeeded, because ztest wants to ASSERT that
*** 1007,1024 **** */ dmu_tx_commit(tx); } /* ! * This is called when an lwb write completes. This means, this specific ! * lwb was written to disk, and all dependent lwb have also been ! * written to disk. ! * ! * At this point, a DKIOCFLUSHWRITECACHE command hasn't been issued to ! * the VDEVs involved in writing out this specific lwb. The lwb will be ! * "done" once zil_lwb_flush_vdevs_done() is called, which occurs in the ! * zio completion callback for the lwb's root zio. */ static void zil_lwb_write_done(zio_t *zio) { lwb_t *lwb = zio->io_private; --- 1051,1071 ---- */ dmu_tx_commit(tx); } /* ! * This is called when an lwb's write zio completes. The callback's ! * purpose is to issue the DKIOCFLUSHWRITECACHE commands for the vdevs ! * in the lwb's lwb_vdev_tree. The tree will contain the vdevs involved ! * in writing out this specific lwb's data, and in the case that cache ! * flushes have been deferred, vdevs involved in writing the data for ! * previous lwbs. The writes corresponding to all the vdevs in the ! * lwb_vdev_tree will have completed by the time this is called, due to ! * the zio dependencies configured in zil_lwb_set_zio_dependency(), ! * which takes deferred flushes into account. The lwb will be "done" ! * once zil_lwb_flush_vdevs_done() is called, which occurs in the zio ! * completion callback for the lwb's root zio. */ static void zil_lwb_write_done(zio_t *zio) { lwb_t *lwb = zio->io_private;
*** 1025,1034 **** --- 1072,1082 ---- spa_t *spa = zio->io_spa; zilog_t *zilog = lwb->lwb_zilog; avl_tree_t *t = &lwb->lwb_vdev_tree; void *cookie = NULL; zil_vdev_node_t *zv; + lwb_t *nlwb; ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), !=, 0); ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF); ASSERT(BP_GET_TYPE(zio->io_bp) == DMU_OT_INTENT_LOG);
*** 1038,1051 **** ASSERT(!BP_IS_HOLE(zio->io_bp)); ASSERT(BP_GET_FILL(zio->io_bp) == 0); abd_put(zio->io_abd); - ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED); - mutex_enter(&zilog->zl_lock); lwb->lwb_write_zio = NULL; mutex_exit(&zilog->zl_lock); if (avl_numnodes(t) == 0) return; --- 1086,1100 ---- ASSERT(!BP_IS_HOLE(zio->io_bp)); ASSERT(BP_GET_FILL(zio->io_bp) == 0); abd_put(zio->io_abd); mutex_enter(&zilog->zl_lock); + ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED); + lwb->lwb_state = LWB_STATE_WRITE_DONE; lwb->lwb_write_zio = NULL; + nlwb = list_next(&zilog->zl_lwb_list, lwb); mutex_exit(&zilog->zl_lock); if (avl_numnodes(t) == 0) return;
*** 1060,1077 **** --- 1109,1214 ---- while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) kmem_free(zv, sizeof (*zv)); return; } + /* + * If this lwb does not have any threads waiting for it to + * complete, we want to defer issuing the DKIOCFLUSHWRITECACHE + * command to the vdevs written to by "this" lwb, and instead + * rely on the "next" lwb to handle the DKIOCFLUSHWRITECACHE + * command for those vdevs. Thus, we merge the vdev tree of + * "this" lwb with the vdev tree of the "next" lwb in the list, + * and assume the "next" lwb will handle flushing the vdevs (or + * deferring the flush(s) again). + * + * This is a useful performance optimization, especially for + * workloads with lots of async write activity and few sync + * write and/or fsync activity, as it has the potential to + * coalesce multiple flush commands to a vdev into one. + */ + if (list_head(&lwb->lwb_waiters) == NULL && nlwb != NULL) { + zil_lwb_flush_defer(lwb, nlwb); + ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); + return; + } + while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) { vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); if (vd != NULL) zio_flush(lwb->lwb_root_zio, vd); kmem_free(zv, sizeof (*zv)); } } + static void + zil_lwb_set_zio_dependency(zilog_t *zilog, lwb_t *lwb) + { + lwb_t *last_lwb_opened = zilog->zl_last_lwb_opened; + + ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); + ASSERT(MUTEX_HELD(&zilog->zl_lock)); + + /* + * The zilog's "zl_last_lwb_opened" field is used to build the + * lwb/zio dependency chain, which is used to preserve the + * ordering of lwb completions that is required by the semantics + * of the ZIL. Each new lwb zio becomes a parent of the + * "previous" lwb zio, such that the new lwb's zio cannot + * complete until the "previous" lwb's zio completes. + * + * This is required by the semantics of zil_commit(); the commit + * waiters attached to the lwbs will be woken in the lwb zio's + * completion callback, so this zio dependency graph ensures the + * waiters are woken in the correct order (the same order the + * lwbs were created). + */ + if (last_lwb_opened != NULL && + last_lwb_opened->lwb_state != LWB_STATE_FLUSH_DONE) { + ASSERT(last_lwb_opened->lwb_state == LWB_STATE_OPENED || + last_lwb_opened->lwb_state == LWB_STATE_ISSUED || + last_lwb_opened->lwb_state == LWB_STATE_WRITE_DONE); + + ASSERT3P(last_lwb_opened->lwb_root_zio, !=, NULL); + zio_add_child(lwb->lwb_root_zio, + last_lwb_opened->lwb_root_zio); + + /* + * If the previous lwb's write hasn't already completed, + * we also want to order the completion of the lwb write + * zios (above, we only order the completion of the lwb + * root zios). This is required because of how we can + * defer the DKIOCFLUSHWRITECACHE commands for each lwb. + * + * When the DKIOCFLUSHWRITECACHE commands are defered, + * the previous lwb will rely on this lwb to flush the + * vdevs written to by that previous lwb. Thus, we need + * to ensure this lwb doesn't issue the flush until + * after the previous lwb's write completes. We ensure + * this ordering by setting the zio parent/child + * relationship here. + * + * Without this relationship on the lwb's write zio, + * it's possible for this lwb's write to complete prior + * to the previous lwb's write completing; and thus, the + * vdevs for the previous lwb would be flushed prior to + * that lwb's data being written to those vdevs (the + * vdevs are flushed in the lwb write zio's completion + * handler, zil_lwb_write_done()). + */ + if (last_lwb_opened->lwb_state != LWB_STATE_WRITE_DONE) { + ASSERT(last_lwb_opened->lwb_state == LWB_STATE_OPENED || + last_lwb_opened->lwb_state == LWB_STATE_ISSUED); + + ASSERT3P(last_lwb_opened->lwb_write_zio, !=, NULL); + zio_add_child(lwb->lwb_write_zio, + last_lwb_opened->lwb_write_zio); + } + } + } + + /* * This function's purpose is to "open" an lwb such that it is ready to * accept new itxs being committed to it. To do this, the lwb's zio * structures are created, and linked to the lwb. This function is * idempotent; if the passed in lwb has already been opened, this
*** 1112,1148 **** ASSERT3P(lwb->lwb_write_zio, !=, NULL); lwb->lwb_state = LWB_STATE_OPENED; mutex_enter(&zilog->zl_lock); ! ! /* ! * The zilog's "zl_last_lwb_opened" field is used to ! * build the lwb/zio dependency chain, which is used to ! * preserve the ordering of lwb completions that is ! * required by the semantics of the ZIL. Each new lwb ! * zio becomes a parent of the "previous" lwb zio, such ! * that the new lwb's zio cannot complete until the ! * "previous" lwb's zio completes. ! * ! * This is required by the semantics of zil_commit(); ! * the commit waiters attached to the lwbs will be woken ! * in the lwb zio's completion callback, so this zio ! * dependency graph ensures the waiters are woken in the ! * correct order (the same order the lwbs were created). ! */ ! lwb_t *last_lwb_opened = zilog->zl_last_lwb_opened; ! if (last_lwb_opened != NULL && ! last_lwb_opened->lwb_state != LWB_STATE_DONE) { ! ASSERT(last_lwb_opened->lwb_state == LWB_STATE_OPENED || ! last_lwb_opened->lwb_state == LWB_STATE_ISSUED); ! ASSERT3P(last_lwb_opened->lwb_root_zio, !=, NULL); ! zio_add_child(lwb->lwb_root_zio, ! last_lwb_opened->lwb_root_zio); ! } zilog->zl_last_lwb_opened = lwb; - mutex_exit(&zilog->zl_lock); } ASSERT3P(lwb->lwb_root_zio, !=, NULL); ASSERT3P(lwb->lwb_write_zio, !=, NULL); --- 1249,1260 ---- ASSERT3P(lwb->lwb_write_zio, !=, NULL); lwb->lwb_state = LWB_STATE_OPENED; mutex_enter(&zilog->zl_lock); ! zil_lwb_set_zio_dependency(zilog, lwb); zilog->zl_last_lwb_opened = lwb; mutex_exit(&zilog->zl_lock); } ASSERT3P(lwb->lwb_root_zio, !=, NULL); ASSERT3P(lwb->lwb_write_zio, !=, NULL);
*** 1206,1222 **** */ tx = dmu_tx_create(zilog->zl_os); /* ! * Since we are not going to create any new dirty data, and we ! * can even help with clearing the existing dirty data, we ! * should not be subject to the dirty data based delays. We ! * use TXG_NOTHROTTLE to bypass the delay mechanism. */ ! VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE)); ! dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); txg = dmu_tx_get_txg(tx); lwb->lwb_tx = tx; --- 1318,1343 ---- */ tx = dmu_tx_create(zilog->zl_os); /* ! * Since we are not going to create any new dirty data and we can even ! * help with clearing the existing dirty data, we should not be subject ! * to the dirty data based delays. ! * We (ab)use TXG_WAITED to bypass the delay mechanism. ! * One side effect from using TXG_WAITED is that dmu_tx_assign() can ! * fail if the pool is suspended. Those are dramatic circumstances, ! * so we return NULL to signal that the normal ZIL processing is not ! * possible and txg_wait_synced() should be used to ensure that the data ! * is on disk. */ ! error = dmu_tx_assign(tx, TXG_WAITED); ! if (error != 0) { ! ASSERT3S(error, ==, EIO); ! dmu_tx_abort(tx); ! return (NULL); ! } dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); txg = dmu_tx_get_txg(tx); lwb->lwb_tx = tx;
*** 1838,1848 **** break; mutex_enter(&zilog->zl_lock); lwb_t *last_lwb = zilog->zl_last_lwb_opened; ! if (last_lwb == NULL || last_lwb->lwb_state == LWB_STATE_DONE) { /* * All of the itxs this waiter was waiting on * must have already completed (or there were * never any itx's for it to wait on), so it's * safe to skip this waiter and mark it done. --- 1959,1970 ---- break; mutex_enter(&zilog->zl_lock); lwb_t *last_lwb = zilog->zl_last_lwb_opened; ! if (last_lwb == NULL || ! last_lwb->lwb_state == LWB_STATE_FLUSH_DONE) { /* * All of the itxs this waiter was waiting on * must have already completed (or there were * never any itx's for it to wait on), so it's * safe to skip this waiter and mark it done.
*** 1919,1929 **** lwb = list_tail(&zilog->zl_lwb_list); if (lwb == NULL) { lwb = zil_create(zilog); } else { ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED); ! ASSERT3S(lwb->lwb_state, !=, LWB_STATE_DONE); } while (itx = list_head(&zilog->zl_itx_commit_list)) { lr_t *lrc = &itx->itx_lr; uint64_t txg = lrc->lrc_txg; --- 2041,2052 ---- lwb = list_tail(&zilog->zl_lwb_list); if (lwb == NULL) { lwb = zil_create(zilog); } else { ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED); ! ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE); ! ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE); } while (itx = list_head(&zilog->zl_itx_commit_list)) { lr_t *lrc = &itx->itx_lr; uint64_t txg = lrc->lrc_txg;
*** 2021,2031 **** } } else { ASSERT(list_is_empty(&nolwb_waiters)); ASSERT3P(lwb, !=, NULL); ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED); ! ASSERT3S(lwb->lwb_state, !=, LWB_STATE_DONE); /* * At this point, the ZIL block pointed at by the "lwb" * variable is in one of the following states: "closed" * or "open". --- 2144,2155 ---- } } else { ASSERT(list_is_empty(&nolwb_waiters)); ASSERT3P(lwb, !=, NULL); ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED); ! ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE); ! ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE); /* * At this point, the ZIL block pointed at by the "lwb" * variable is in one of the following states: "closed" * or "open".
*** 2142,2152 **** * point of this function is to issue the lwb). Additionally, we * do this prior to acquiring the zl_issuer_lock, to avoid * acquiring it when it's not necessary to do so. */ if (lwb->lwb_state == LWB_STATE_ISSUED || ! lwb->lwb_state == LWB_STATE_DONE) return; /* * In order to call zil_lwb_write_issue() we must hold the * zilog's "zl_issuer_lock". We can't simply acquire that lock, --- 2266,2277 ---- * point of this function is to issue the lwb). Additionally, we * do this prior to acquiring the zl_issuer_lock, to avoid * acquiring it when it's not necessary to do so. */ if (lwb->lwb_state == LWB_STATE_ISSUED || ! lwb->lwb_state == LWB_STATE_WRITE_DONE || ! lwb->lwb_state == LWB_STATE_FLUSH_DONE) return; /* * In order to call zil_lwb_write_issue() we must hold the * zilog's "zl_issuer_lock". We can't simply acquire that lock,
*** 2190,2200 **** * * See the comment above the lwb_state_t structure definition for * more details on the lwb states, and locking requirements. */ if (lwb->lwb_state == LWB_STATE_ISSUED || ! lwb->lwb_state == LWB_STATE_DONE) goto out; ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED); /* --- 2315,2326 ---- * * See the comment above the lwb_state_t structure definition for * more details on the lwb states, and locking requirements. */ if (lwb->lwb_state == LWB_STATE_ISSUED || ! lwb->lwb_state == LWB_STATE_WRITE_DONE || ! lwb->lwb_state == LWB_STATE_FLUSH_DONE) goto out; ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED); /*
*** 2363,2373 **** * is required. */ IMPLY(lwb != NULL, lwb->lwb_state == LWB_STATE_ISSUED || ! lwb->lwb_state == LWB_STATE_DONE); cv_wait(&zcw->zcw_cv, &zcw->zcw_lock); } } mutex_exit(&zcw->zcw_lock); --- 2489,2500 ---- * is required. */ IMPLY(lwb != NULL, lwb->lwb_state == LWB_STATE_ISSUED || ! lwb->lwb_state == LWB_STATE_WRITE_DONE || ! lwb->lwb_state == LWB_STATE_FLUSH_DONE); cv_wait(&zcw->zcw_cv, &zcw->zcw_lock); } } mutex_exit(&zcw->zcw_lock);
*** 3005,3021 **** * We need to use zil_commit_impl to ensure we wait for all * LWB_STATE_OPENED and LWB_STATE_ISSUED lwb's to be committed * to disk before proceeding. If we used zil_commit instead, it * would just call txg_wait_synced(), because zl_suspend is set. * txg_wait_synced() doesn't wait for these lwb's to be ! * LWB_STATE_DONE before returning. */ zil_commit_impl(zilog, 0); /* ! * Now that we've ensured all lwb's are LWB_STATE_DONE, we use ! * txg_wait_synced() to ensure the data from the zilog has * migrated to the main pool before calling zil_destroy(). */ txg_wait_synced(zilog->zl_dmu_pool, 0); zil_destroy(zilog, B_FALSE); --- 3132,3148 ---- * We need to use zil_commit_impl to ensure we wait for all * LWB_STATE_OPENED and LWB_STATE_ISSUED lwb's to be committed * to disk before proceeding. If we used zil_commit instead, it * would just call txg_wait_synced(), because zl_suspend is set. * txg_wait_synced() doesn't wait for these lwb's to be ! * LWB_STATE_FLUSH_DONE before returning. */ zil_commit_impl(zilog, 0); /* ! * Now that we've ensured all lwb's are LWB_STATE_FLUSH_DONE, we ! * use txg_wait_synced() to ensure the data from the zilog has * migrated to the main pool before calling zil_destroy(). */ txg_wait_synced(zilog->zl_dmu_pool, 0); zil_destroy(zilog, B_FALSE);
*** 3215,3225 **** return (B_FALSE); } /* ARGSUSED */ int ! zil_reset(const char *osname, void *arg) { int error; error = zil_suspend(osname, NULL); if (error != 0) --- 3342,3352 ---- return (B_FALSE); } /* ARGSUSED */ int ! zil_vdev_offline(const char *osname, void *arg) { int error; error = zil_suspend(osname, NULL); if (error != 0)