big-one Udiff usr/src/uts/common/fs/zfs/dmu

Print this page

NEX-20218 Backport Illumos #9474 txg_kick() fails to see that we are quiescing, forcing transactions to their next stages without leaving them accumulate changes
MFV illumos-gate@fa41d87de9ec9000964c605eb01d6dc19e4a1abe
    9464 txg_kick() fails to see that we are quiescing, forcing transactions to their next stages without leaving them accumulate changes
    Reviewed by: Matt Ahrens <matt@delphix.com>
    Reviewed by: Brad Lewis <brad.lewis@delphix.com>
    Reviewed by: Andriy Gapon <avg@FreeBSD.org>
    Approved by: Dan McDonald <danmcd@joyent.com>
NEX-6859 TX-commit callback that is registered in sync-ctx causes system panic
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
NEX-5795 Rename 'wrc' as 'wbc' in the source and in the tech docs
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
5911 ZFS "hangs" while deleting file
Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Simon Klinkert <simon.klinkert@gmail.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-1823 Slow performance doing of a large dataset
5911 ZFS "hangs" while deleting file
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Bayard Bell <bayard.bell@nexenta.com>
Moved closed ZFS files to open repo, changed Makefiles accordingly
Removed unneeded weak symbols
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
Bug 11205: add missing libzfs_closed_stubs.c to fix opensource-only build.
ZFS plus work: special vdevs, cos, cos/vdev properties

@@ -18,11 +18,11 @@
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
 
 #include <sys/dmu.h>

@@ -299,27 +299,10 @@
                 dmu_tx_count_dnode(txh);
         }
 }
 
 void
-dmu_tx_hold_remap_l1indirect(dmu_tx_t *tx, uint64_t object)
-{
-        dmu_tx_hold_t *txh;
-
-        ASSERT(tx->tx_txg == 0);
-        txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
-            object, THT_WRITE, 0, 0);
-        if (txh == NULL)
-                return;
-
-        dnode_t *dn = txh->txh_dnode;
-        (void) refcount_add_many(&txh->txh_space_towrite,
-            1ULL << dn->dn_indblkshift, FTAG);
-        dmu_tx_count_dnode(txh);
-}
-
-void
 dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
 {
         dmu_tx_hold_t *txh;
 
         ASSERT0(tx->tx_txg);

@@ -867,11 +850,11 @@
  * usage by one transaction.  Also, as we approach the allowed usage,
  * we will allow a very limited amount of changes into each TXG, thus
  * decreasing performance.
  */
 static int
-dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
+dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
 {
         spa_t *spa = tx->tx_pool->dp_spa;
 
         ASSERT0(tx->tx_txg);

@@ -887,17 +870,17 @@
                  *
                  * Note that we always honor the txg_how flag regardless
                  * of the failuremode setting.
                  */
                 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
-                    !(txg_how & TXG_WAIT))
+                    txg_how != TXG_WAIT)
                         return (SET_ERROR(EIO));
 
                 return (SET_ERROR(ERESTART));
         }
 
-        if (!tx->tx_dirty_delayed &&
+        if (!tx->tx_waited &&
             dsl_pool_need_dirty_delay(tx->tx_pool)) {
                 tx->tx_wait_dirty = B_TRUE;
                 return (SET_ERROR(ERESTART));
         }

@@ -981,48 +964,45 @@
         tx->tx_lasttried_txg = tx->tx_txg;
         tx->tx_txg = 0;
 }
 
 /*
- * Assign tx to a transaction group; txg_how is a bitmask:
+ * Assign tx to a transaction group.  txg_how can be one of:
  *
- * If TXG_WAIT is set and the currently open txg is full, this function
- * will wait until there's a new txg. This should be used when no locks
- * are being held. With this bit set, this function will only fail if
- * we're truly out of space (or over quota).
+ * (1)  TXG_WAIT.  If the current open txg is full, waits until there's
+ *      a new one.  This should be used when you're not holding locks.
+ *      It will only fail if we're truly out of space (or over quota).
  *
- * If TXG_WAIT is *not* set and we can't assign into the currently open
- * txg without blocking, this function will return immediately with
- * ERESTART. This should be used whenever locks are being held.  On an
- * ERESTART error, the caller should drop all locks, call dmu_tx_wait(),
- * and try again.
+ * (2)  TXG_NOWAIT.  If we can't assign into the current open txg without
+ *      blocking, returns immediately with ERESTART.  This should be used
+ *      whenever you're holding locks.  On an ERESTART error, the caller
+ *      should drop locks, do a dmu_tx_wait(tx), and try again.
  *
- * If TXG_NOTHROTTLE is set, this indicates that this tx should not be
- * delayed due on the ZFS Write Throttle (see comments in dsl_pool.c for
- * details on the throttle). This is used by the VFS operations, after
- * they have already called dmu_tx_wait() (though most likely on a
- * different tx).
+ * (3)  TXG_WAITED.  Like TXG_NOWAIT, but indicates that dmu_tx_wait()
+ *      has already been called on behalf of this operation (though
+ *      most likely on a different tx).
  */
 int
-dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
+dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
 {
         int err;
 
         ASSERT(tx->tx_txg == 0);
-        ASSERT0(txg_how & ~(TXG_WAIT | TXG_NOTHROTTLE));
+        ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT ||
+            txg_how == TXG_WAITED);
         ASSERT(!dsl_pool_sync_context(tx->tx_pool));
 
         /* If we might wait, we must not hold the config lock. */
-        IMPLY((txg_how & TXG_WAIT), !dsl_pool_config_held(tx->tx_pool));
+        ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
 
-        if ((txg_how & TXG_NOTHROTTLE))
-                tx->tx_dirty_delayed = B_TRUE;
+        if (txg_how == TXG_WAITED)
+                tx->tx_waited = B_TRUE;
 
         while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
                 dmu_tx_unassign(tx);
 
-                if (err != ERESTART || !(txg_how & TXG_WAIT))
+                if (err != ERESTART || txg_how != TXG_WAIT)
                         return (err);
 
                 dmu_tx_wait(tx);
         }

@@ -1055,16 +1035,16 @@
                 dmu_tx_delay(tx, dirty);
 
                 tx->tx_wait_dirty = B_FALSE;
 
                 /*
-                 * Note: setting tx_dirty_delayed only has effect if the
-                 * caller used TX_WAIT.  Otherwise they are going to
-                 * destroy this tx and try again.  The common case,
-                 * zfs_write(), uses TX_WAIT.
+                 * Note: setting tx_waited only has effect if the caller
+                 * used TX_WAIT.  Otherwise they are going to destroy
+                 * this tx and try again.  The common case, zfs_write(),
+                 * uses TX_WAIT.
                  */
-                tx->tx_dirty_delayed = B_TRUE;
+                tx->tx_waited = B_TRUE;
         } else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) {
                 /*
                  * If the pool is suspended we need to wait until it
                  * is resumed.  Note that it's possible that the pool
                  * has become active after this thread has tried to

@@ -1083,11 +1063,16 @@
                 while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1)
                         cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
                 mutex_exit(&dn->dn_mtx);
                 tx->tx_needassign_txh = NULL;
         } else {
-                txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1);
+                /*
+                 * If we have a lot of dirty data just wait until we sync
+                 * out a TXG at which point we'll hopefully have synced
+                 * a portion of the changes.
+                 */
+                txg_wait_synced(dp, spa_last_synced_txg(spa) + 1);
         }
 }
 
 static void
 dmu_tx_destroy(dmu_tx_t *tx)

@@ -1139,12 +1124,19 @@
         }
 
         if (tx->tx_tempreserve_cookie)
                 dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
 
-        if (!list_is_empty(&tx->tx_callbacks))
-                txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks);
+        if (!list_is_empty(&tx->tx_callbacks)) {
+                if (dmu_tx_is_syncing(tx)) {
+                        txg_register_callbacks_sync(tx->tx_pool,
+                            tx->tx_txg, &tx->tx_callbacks);
+                } else {
+                        txg_register_callbacks(&tx->tx_txgh,
+                            &tx->tx_callbacks);
+                }
+        }
 
         if (tx->tx_anyobj == FALSE)
                 txg_rele_to_sync(&tx->tx_txgh);
 
         dmu_tx_destroy(tx);