big-one Cdiff usr/src/uts/common/fs/zfs/dmu

Print this page

NEX-20218 Backport Illumos #9474 txg_kick() fails to see that we are quiescing, forcing transactions to their next stages without leaving them accumulate changes
MFV illumos-gate@fa41d87de9ec9000964c605eb01d6dc19e4a1abe
    9464 txg_kick() fails to see that we are quiescing, forcing transactions to their next stages without leaving them accumulate changes
    Reviewed by: Matt Ahrens <matt@delphix.com>
    Reviewed by: Brad Lewis <brad.lewis@delphix.com>
    Reviewed by: Andriy Gapon <avg@FreeBSD.org>
    Approved by: Dan McDonald <danmcd@joyent.com>
NEX-6859 TX-commit callback that is registered in sync-ctx causes system panic
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
NEX-5795 Rename 'wrc' as 'wbc' in the source and in the tech docs
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
5911 ZFS "hangs" while deleting file
Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Simon Klinkert <simon.klinkert@gmail.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-1823 Slow performance doing of a large dataset
5911 ZFS "hangs" while deleting file
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Bayard Bell <bayard.bell@nexenta.com>
Moved closed ZFS files to open repo, changed Makefiles accordingly
Removed unneeded weak symbols
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
Bug 11205: add missing libzfs_closed_stubs.c to fix opensource-only build.
ZFS plus work: special vdevs, cos, cos/vdev properties


*** 18,28 ****
   *
   * CDDL HEADER END
   */
  /*
   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
!  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
   * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
   * Copyright (c) 2014 Integros [integros.com]
   */
  
  #include <sys/dmu.h>
--- 18,28 ----
   *
   * CDDL HEADER END
   */
  /*
   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
!  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
   * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
   * Copyright (c) 2014 Integros [integros.com]
   */
  
  #include <sys/dmu.h>
*** 299,325 ****
                  dmu_tx_count_dnode(txh);
          }
  }
  
  void
- dmu_tx_hold_remap_l1indirect(dmu_tx_t *tx, uint64_t object)
- {
-         dmu_tx_hold_t *txh;
- 
-         ASSERT(tx->tx_txg == 0);
-         txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
-             object, THT_WRITE, 0, 0);
-         if (txh == NULL)
-                 return;
- 
-         dnode_t *dn = txh->txh_dnode;
-         (void) refcount_add_many(&txh->txh_space_towrite,
-             1ULL << dn->dn_indblkshift, FTAG);
-         dmu_tx_count_dnode(txh);
- }
- 
- void
  dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
  {
          dmu_tx_hold_t *txh;
  
          ASSERT0(tx->tx_txg);
--- 299,308 ----
*** 867,877 ****
   * usage by one transaction.  Also, as we approach the allowed usage,
   * we will allow a very limited amount of changes into each TXG, thus
   * decreasing performance.
   */
  static int
! dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
  {
          spa_t *spa = tx->tx_pool->dp_spa;
  
          ASSERT0(tx->tx_txg);
  
--- 850,860 ----
   * usage by one transaction.  Also, as we approach the allowed usage,
   * we will allow a very limited amount of changes into each TXG, thus
   * decreasing performance.
   */
  static int
! dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
  {
          spa_t *spa = tx->tx_pool->dp_spa;
  
          ASSERT0(tx->tx_txg);
  
*** 887,903 ****
                   *
                   * Note that we always honor the txg_how flag regardless
                   * of the failuremode setting.
                   */
                  if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
!                     !(txg_how & TXG_WAIT))
                          return (SET_ERROR(EIO));
  
                  return (SET_ERROR(ERESTART));
          }
  
!         if (!tx->tx_dirty_delayed &&
              dsl_pool_need_dirty_delay(tx->tx_pool)) {
                  tx->tx_wait_dirty = B_TRUE;
                  return (SET_ERROR(ERESTART));
          }
  
--- 870,886 ----
                   *
                   * Note that we always honor the txg_how flag regardless
                   * of the failuremode setting.
                   */
                  if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
!                     txg_how != TXG_WAIT)
                          return (SET_ERROR(EIO));
  
                  return (SET_ERROR(ERESTART));
          }
  
!         if (!tx->tx_waited &&
              dsl_pool_need_dirty_delay(tx->tx_pool)) {
                  tx->tx_wait_dirty = B_TRUE;
                  return (SET_ERROR(ERESTART));
          }
  
*** 981,1028 ****
          tx->tx_lasttried_txg = tx->tx_txg;
          tx->tx_txg = 0;
  }
  
  /*
!  * Assign tx to a transaction group; txg_how is a bitmask:
   *
!  * If TXG_WAIT is set and the currently open txg is full, this function
!  * will wait until there's a new txg. This should be used when no locks
!  * are being held. With this bit set, this function will only fail if
!  * we're truly out of space (or over quota).
   *
!  * If TXG_WAIT is *not* set and we can't assign into the currently open
!  * txg without blocking, this function will return immediately with
!  * ERESTART. This should be used whenever locks are being held.  On an
!  * ERESTART error, the caller should drop all locks, call dmu_tx_wait(),
!  * and try again.
   *
!  * If TXG_NOTHROTTLE is set, this indicates that this tx should not be
!  * delayed due on the ZFS Write Throttle (see comments in dsl_pool.c for
!  * details on the throttle). This is used by the VFS operations, after
!  * they have already called dmu_tx_wait() (though most likely on a
!  * different tx).
   */
  int
! dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
  {
          int err;
  
          ASSERT(tx->tx_txg == 0);
!         ASSERT0(txg_how & ~(TXG_WAIT | TXG_NOTHROTTLE));
          ASSERT(!dsl_pool_sync_context(tx->tx_pool));
  
          /* If we might wait, we must not hold the config lock. */
!         IMPLY((txg_how & TXG_WAIT), !dsl_pool_config_held(tx->tx_pool));
  
!         if ((txg_how & TXG_NOTHROTTLE))
!                 tx->tx_dirty_delayed = B_TRUE;
  
          while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
                  dmu_tx_unassign(tx);
  
!                 if (err != ERESTART || !(txg_how & TXG_WAIT))
                          return (err);
  
                  dmu_tx_wait(tx);
          }
  
--- 964,1008 ----
          tx->tx_lasttried_txg = tx->tx_txg;
          tx->tx_txg = 0;
  }
  
  /*
!  * Assign tx to a transaction group.  txg_how can be one of:
   *
!  * (1)  TXG_WAIT.  If the current open txg is full, waits until there's
!  *      a new one.  This should be used when you're not holding locks.
!  *      It will only fail if we're truly out of space (or over quota).
   *
!  * (2)  TXG_NOWAIT.  If we can't assign into the current open txg without
!  *      blocking, returns immediately with ERESTART.  This should be used
!  *      whenever you're holding locks.  On an ERESTART error, the caller
!  *      should drop locks, do a dmu_tx_wait(tx), and try again.
   *
!  * (3)  TXG_WAITED.  Like TXG_NOWAIT, but indicates that dmu_tx_wait()
!  *      has already been called on behalf of this operation (though
!  *      most likely on a different tx).
   */
  int
! dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
  {
          int err;
  
          ASSERT(tx->tx_txg == 0);
!         ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT ||
!             txg_how == TXG_WAITED);
          ASSERT(!dsl_pool_sync_context(tx->tx_pool));
  
          /* If we might wait, we must not hold the config lock. */
!         ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
  
!         if (txg_how == TXG_WAITED)
!                 tx->tx_waited = B_TRUE;
  
          while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
                  dmu_tx_unassign(tx);
  
!                 if (err != ERESTART || txg_how != TXG_WAIT)
                          return (err);
  
                  dmu_tx_wait(tx);
          }
  
*** 1055,1070 ****
                  dmu_tx_delay(tx, dirty);
  
                  tx->tx_wait_dirty = B_FALSE;
  
                  /*
!                  * Note: setting tx_dirty_delayed only has effect if the
!                  * caller used TX_WAIT.  Otherwise they are going to
!                  * destroy this tx and try again.  The common case,
!                  * zfs_write(), uses TX_WAIT.
                   */
!                 tx->tx_dirty_delayed = B_TRUE;
          } else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) {
                  /*
                   * If the pool is suspended we need to wait until it
                   * is resumed.  Note that it's possible that the pool
                   * has become active after this thread has tried to
--- 1035,1050 ----
                  dmu_tx_delay(tx, dirty);
  
                  tx->tx_wait_dirty = B_FALSE;
  
                  /*
!                  * Note: setting tx_waited only has effect if the caller
!                  * used TX_WAIT.  Otherwise they are going to destroy
!                  * this tx and try again.  The common case, zfs_write(),
!                  * uses TX_WAIT.
                   */
!                 tx->tx_waited = B_TRUE;
          } else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) {
                  /*
                   * If the pool is suspended we need to wait until it
                   * is resumed.  Note that it's possible that the pool
                   * has become active after this thread has tried to
*** 1083,1093 ****
                  while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1)
                          cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
                  mutex_exit(&dn->dn_mtx);
                  tx->tx_needassign_txh = NULL;
          } else {
!                 txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1);
          }
  }
  
  static void
  dmu_tx_destroy(dmu_tx_t *tx)
--- 1063,1078 ----
                  while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1)
                          cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
                  mutex_exit(&dn->dn_mtx);
                  tx->tx_needassign_txh = NULL;
          } else {
!                 /*
!                  * If we have a lot of dirty data just wait until we sync
!                  * out a TXG at which point we'll hopefully have synced
!                  * a portion of the changes.
!                  */
!                 txg_wait_synced(dp, spa_last_synced_txg(spa) + 1);
          }
  }
  
  static void
  dmu_tx_destroy(dmu_tx_t *tx)
*** 1139,1150 ****
          }
  
          if (tx->tx_tempreserve_cookie)
                  dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
  
!         if (!list_is_empty(&tx->tx_callbacks))
!                 txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks);
  
          if (tx->tx_anyobj == FALSE)
                  txg_rele_to_sync(&tx->tx_txgh);
  
          dmu_tx_destroy(tx);
--- 1124,1142 ----
          }
  
          if (tx->tx_tempreserve_cookie)
                  dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
  
!         if (!list_is_empty(&tx->tx_callbacks)) {
!                 if (dmu_tx_is_syncing(tx)) {
!                         txg_register_callbacks_sync(tx->tx_pool,
!                             tx->tx_txg, &tx->tx_callbacks);
!                 } else {
!                         txg_register_callbacks(&tx->tx_txgh,
!                             &tx->tx_callbacks);
!                 }
!         }
  
          if (tx->tx_anyobj == FALSE)
                  txg_rele_to_sync(&tx->tx_txgh);
  
          dmu_tx_destroy(tx);