3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 */
27
28 #include <sys/dmu.h>
29 #include <sys/dmu_impl.h>
30 #include <sys/dbuf.h>
31 #include <sys/dmu_tx.h>
32 #include <sys/dmu_objset.h>
33 #include <sys/dsl_dataset.h>
34 #include <sys/dsl_dir.h>
35 #include <sys/dsl_pool.h>
36 #include <sys/zap_impl.h>
37 #include <sys/spa.h>
38 #include <sys/sa.h>
39 #include <sys/sa_impl.h>
40 #include <sys/zfs_context.h>
41 #include <sys/varargs.h>
42
43 typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
284 }
285
286 void
287 dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
288 {
289 dmu_tx_hold_t *txh;
290
291 ASSERT0(tx->tx_txg);
292 ASSERT3U(len, <=, DMU_MAX_ACCESS);
293 ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
294
295 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
296 object, THT_WRITE, off, len);
297 if (txh != NULL) {
298 dmu_tx_count_write(txh, off, len);
299 dmu_tx_count_dnode(txh);
300 }
301 }
302
303 void
304 dmu_tx_hold_remap_l1indirect(dmu_tx_t *tx, uint64_t object)
305 {
306 dmu_tx_hold_t *txh;
307
308 ASSERT(tx->tx_txg == 0);
309 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
310 object, THT_WRITE, 0, 0);
311 if (txh == NULL)
312 return;
313
314 dnode_t *dn = txh->txh_dnode;
315 (void) refcount_add_many(&txh->txh_space_towrite,
316 1ULL << dn->dn_indblkshift, FTAG);
317 dmu_tx_count_dnode(txh);
318 }
319
320 void
321 dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
322 {
323 dmu_tx_hold_t *txh;
324
325 ASSERT0(tx->tx_txg);
326 ASSERT3U(len, <=, DMU_MAX_ACCESS);
327 ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
328
329 txh = dmu_tx_hold_dnode_impl(tx, dn, THT_WRITE, off, len);
330 if (txh != NULL) {
331 dmu_tx_count_write(txh, off, len);
332 dmu_tx_count_dnode(txh);
333 }
334 }
335
336 /*
337 * This function marks the transaction as being a "net free". The end
338 * result is that refquotas will be disabled for this transaction, and
339 * this transaction will be able to use half of the pool space overhead
340 * (see dsl_pool_adjustedsize()). Therefore this function should only
852 *
853 * - dd_tempreserved[], which is the sum of in-flight transactions'
854 * holds' txh_space_towrite (i.e. those transactions that have called
855 * dmu_tx_assign() but not yet called dmu_tx_commit()).
856 *
857 * - dd_space_towrite[], which is the amount of dirtied dbufs.
858 *
859 * Note that all of these values are inflated by spa_get_worst_case_asize(),
860 * which means that we may get ERESTART well before we are actually in danger
861 * of running out of space, but this also mitigates any small inaccuracies
862 * in the rough estimate (e.g. txh_space_towrite doesn't take into account
863 * indirect blocks, and dd_space_towrite[] doesn't take into account changes
864 * to the MOS).
865 *
866 * Note that due to this algorithm, it is possible to exceed the allowed
867 * usage by one transaction. Also, as we approach the allowed usage,
868 * we will allow a very limited amount of changes into each TXG, thus
869 * decreasing performance.
870 */
871 static int
872 dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
873 {
874 spa_t *spa = tx->tx_pool->dp_spa;
875
876 ASSERT0(tx->tx_txg);
877
878 if (tx->tx_err)
879 return (tx->tx_err);
880
881 if (spa_suspended(spa)) {
882 /*
883 * If the user has indicated a blocking failure mode
884 * then return ERESTART which will block in dmu_tx_wait().
885 * Otherwise, return EIO so that an error can get
886 * propagated back to the VOP calls.
887 *
888 * Note that we always honor the txg_how flag regardless
889 * of the failuremode setting.
890 */
891 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
892 !(txg_how & TXG_WAIT))
893 return (SET_ERROR(EIO));
894
895 return (SET_ERROR(ERESTART));
896 }
897
898 if (!tx->tx_dirty_delayed &&
899 dsl_pool_need_dirty_delay(tx->tx_pool)) {
900 tx->tx_wait_dirty = B_TRUE;
901 return (SET_ERROR(ERESTART));
902 }
903
904 tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh);
905 tx->tx_needassign_txh = NULL;
906
907 /*
908 * NB: No error returns are allowed after txg_hold_open, but
909 * before processing the dnode holds, due to the
910 * dmu_tx_unassign() logic.
911 */
912
913 uint64_t towrite = 0;
914 uint64_t tohold = 0;
915 for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL;
916 txh = list_next(&tx->tx_holds, txh)) {
917 dnode_t *dn = txh->txh_dnode;
918 if (dn != NULL) {
966
967 if (dn == NULL)
968 continue;
969 mutex_enter(&dn->dn_mtx);
970 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
971
972 if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
973 dn->dn_assigned_txg = 0;
974 cv_broadcast(&dn->dn_notxholds);
975 }
976 mutex_exit(&dn->dn_mtx);
977 }
978
979 txg_rele_to_sync(&tx->tx_txgh);
980
981 tx->tx_lasttried_txg = tx->tx_txg;
982 tx->tx_txg = 0;
983 }
984
985 /*
986 * Assign tx to a transaction group; txg_how is a bitmask:
987 *
988 * If TXG_WAIT is set and the currently open txg is full, this function
989 * will wait until there's a new txg. This should be used when no locks
990 * are being held. With this bit set, this function will only fail if
991 * we're truly out of space (or over quota).
992 *
993 * If TXG_WAIT is *not* set and we can't assign into the currently open
994 * txg without blocking, this function will return immediately with
995 * ERESTART. This should be used whenever locks are being held. On an
996 * ERESTART error, the caller should drop all locks, call dmu_tx_wait(),
997 * and try again.
998 *
999 * If TXG_NOTHROTTLE is set, this indicates that this tx should not be
1000 * delayed due on the ZFS Write Throttle (see comments in dsl_pool.c for
1001 * details on the throttle). This is used by the VFS operations, after
1002 * they have already called dmu_tx_wait() (though most likely on a
1003 * different tx).
1004 */
1005 int
1006 dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
1007 {
1008 int err;
1009
1010 ASSERT(tx->tx_txg == 0);
1011 ASSERT0(txg_how & ~(TXG_WAIT | TXG_NOTHROTTLE));
1012 ASSERT(!dsl_pool_sync_context(tx->tx_pool));
1013
1014 /* If we might wait, we must not hold the config lock. */
1015 IMPLY((txg_how & TXG_WAIT), !dsl_pool_config_held(tx->tx_pool));
1016
1017 if ((txg_how & TXG_NOTHROTTLE))
1018 tx->tx_dirty_delayed = B_TRUE;
1019
1020 while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
1021 dmu_tx_unassign(tx);
1022
1023 if (err != ERESTART || !(txg_how & TXG_WAIT))
1024 return (err);
1025
1026 dmu_tx_wait(tx);
1027 }
1028
1029 txg_rele_to_quiesce(&tx->tx_txgh);
1030
1031 return (0);
1032 }
1033
1034 void
1035 dmu_tx_wait(dmu_tx_t *tx)
1036 {
1037 spa_t *spa = tx->tx_pool->dp_spa;
1038 dsl_pool_t *dp = tx->tx_pool;
1039
1040 ASSERT(tx->tx_txg == 0);
1041 ASSERT(!dsl_pool_config_held(tx->tx_pool));
1042
1043 if (tx->tx_wait_dirty) {
1044 /*
1045 * dmu_tx_try_assign() has determined that we need to wait
1046 * because we've consumed much or all of the dirty buffer
1047 * space.
1048 */
1049 mutex_enter(&dp->dp_lock);
1050 while (dp->dp_dirty_total >= zfs_dirty_data_max)
1051 cv_wait(&dp->dp_spaceavail_cv, &dp->dp_lock);
1052 uint64_t dirty = dp->dp_dirty_total;
1053 mutex_exit(&dp->dp_lock);
1054
1055 dmu_tx_delay(tx, dirty);
1056
1057 tx->tx_wait_dirty = B_FALSE;
1058
1059 /*
1060 * Note: setting tx_dirty_delayed only has effect if the
1061 * caller used TX_WAIT. Otherwise they are going to
1062 * destroy this tx and try again. The common case,
1063 * zfs_write(), uses TX_WAIT.
1064 */
1065 tx->tx_dirty_delayed = B_TRUE;
1066 } else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) {
1067 /*
1068 * If the pool is suspended we need to wait until it
1069 * is resumed. Note that it's possible that the pool
1070 * has become active after this thread has tried to
1071 * obtain a tx. If that's the case then tx_lasttried_txg
1072 * would not have been set.
1073 */
1074 txg_wait_synced(dp, spa_last_synced_txg(spa) + 1);
1075 } else if (tx->tx_needassign_txh) {
1076 /*
1077 * A dnode is assigned to the quiescing txg. Wait for its
1078 * transaction to complete.
1079 */
1080 dnode_t *dn = tx->tx_needassign_txh->txh_dnode;
1081
1082 mutex_enter(&dn->dn_mtx);
1083 while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1)
1084 cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
1085 mutex_exit(&dn->dn_mtx);
1086 tx->tx_needassign_txh = NULL;
1087 } else {
1088 txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1);
1089 }
1090 }
1091
1092 static void
1093 dmu_tx_destroy(dmu_tx_t *tx)
1094 {
1095 dmu_tx_hold_t *txh;
1096
1097 while ((txh = list_head(&tx->tx_holds)) != NULL) {
1098 dnode_t *dn = txh->txh_dnode;
1099
1100 list_remove(&tx->tx_holds, txh);
1101 refcount_destroy_many(&txh->txh_space_towrite,
1102 refcount_count(&txh->txh_space_towrite));
1103 refcount_destroy_many(&txh->txh_memory_tohold,
1104 refcount_count(&txh->txh_memory_tohold));
1105 kmem_free(txh, sizeof (dmu_tx_hold_t));
1106 if (dn != NULL)
1107 dnode_rele(dn, tx);
1108 }
1124 for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL;
1125 txh = list_next(&tx->tx_holds, txh)) {
1126 dnode_t *dn = txh->txh_dnode;
1127
1128 if (dn == NULL)
1129 continue;
1130
1131 mutex_enter(&dn->dn_mtx);
1132 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
1133
1134 if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
1135 dn->dn_assigned_txg = 0;
1136 cv_broadcast(&dn->dn_notxholds);
1137 }
1138 mutex_exit(&dn->dn_mtx);
1139 }
1140
1141 if (tx->tx_tempreserve_cookie)
1142 dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
1143
1144 if (!list_is_empty(&tx->tx_callbacks))
1145 txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks);
1146
1147 if (tx->tx_anyobj == FALSE)
1148 txg_rele_to_sync(&tx->tx_txgh);
1149
1150 dmu_tx_destroy(tx);
1151 }
1152
1153 void
1154 dmu_tx_abort(dmu_tx_t *tx)
1155 {
1156 ASSERT(tx->tx_txg == 0);
1157
1158 /*
1159 * Call any registered callbacks with an error code.
1160 */
1161 if (!list_is_empty(&tx->tx_callbacks))
1162 dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED);
1163
1164 dmu_tx_destroy(tx);
1165 }
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 */
27
28 #include <sys/dmu.h>
29 #include <sys/dmu_impl.h>
30 #include <sys/dbuf.h>
31 #include <sys/dmu_tx.h>
32 #include <sys/dmu_objset.h>
33 #include <sys/dsl_dataset.h>
34 #include <sys/dsl_dir.h>
35 #include <sys/dsl_pool.h>
36 #include <sys/zap_impl.h>
37 #include <sys/spa.h>
38 #include <sys/sa.h>
39 #include <sys/sa_impl.h>
40 #include <sys/zfs_context.h>
41 #include <sys/varargs.h>
42
43 typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
284 }
285
286 void
287 dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
288 {
289 dmu_tx_hold_t *txh;
290
291 ASSERT0(tx->tx_txg);
292 ASSERT3U(len, <=, DMU_MAX_ACCESS);
293 ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
294
295 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
296 object, THT_WRITE, off, len);
297 if (txh != NULL) {
298 dmu_tx_count_write(txh, off, len);
299 dmu_tx_count_dnode(txh);
300 }
301 }
302
303 void
304 dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
305 {
306 dmu_tx_hold_t *txh;
307
308 ASSERT0(tx->tx_txg);
309 ASSERT3U(len, <=, DMU_MAX_ACCESS);
310 ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
311
312 txh = dmu_tx_hold_dnode_impl(tx, dn, THT_WRITE, off, len);
313 if (txh != NULL) {
314 dmu_tx_count_write(txh, off, len);
315 dmu_tx_count_dnode(txh);
316 }
317 }
318
319 /*
320 * This function marks the transaction as being a "net free". The end
321 * result is that refquotas will be disabled for this transaction, and
322 * this transaction will be able to use half of the pool space overhead
323 * (see dsl_pool_adjustedsize()). Therefore this function should only
835 *
836 * - dd_tempreserved[], which is the sum of in-flight transactions'
837 * holds' txh_space_towrite (i.e. those transactions that have called
838 * dmu_tx_assign() but not yet called dmu_tx_commit()).
839 *
840 * - dd_space_towrite[], which is the amount of dirtied dbufs.
841 *
842 * Note that all of these values are inflated by spa_get_worst_case_asize(),
843 * which means that we may get ERESTART well before we are actually in danger
844 * of running out of space, but this also mitigates any small inaccuracies
845 * in the rough estimate (e.g. txh_space_towrite doesn't take into account
846 * indirect blocks, and dd_space_towrite[] doesn't take into account changes
847 * to the MOS).
848 *
849 * Note that due to this algorithm, it is possible to exceed the allowed
850 * usage by one transaction. Also, as we approach the allowed usage,
851 * we will allow a very limited amount of changes into each TXG, thus
852 * decreasing performance.
853 */
854 static int
855 dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
856 {
857 spa_t *spa = tx->tx_pool->dp_spa;
858
859 ASSERT0(tx->tx_txg);
860
861 if (tx->tx_err)
862 return (tx->tx_err);
863
864 if (spa_suspended(spa)) {
865 /*
866 * If the user has indicated a blocking failure mode
867 * then return ERESTART which will block in dmu_tx_wait().
868 * Otherwise, return EIO so that an error can get
869 * propagated back to the VOP calls.
870 *
871 * Note that we always honor the txg_how flag regardless
872 * of the failuremode setting.
873 */
874 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
875 txg_how != TXG_WAIT)
876 return (SET_ERROR(EIO));
877
878 return (SET_ERROR(ERESTART));
879 }
880
881 if (!tx->tx_waited &&
882 dsl_pool_need_dirty_delay(tx->tx_pool)) {
883 tx->tx_wait_dirty = B_TRUE;
884 return (SET_ERROR(ERESTART));
885 }
886
887 tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh);
888 tx->tx_needassign_txh = NULL;
889
890 /*
891 * NB: No error returns are allowed after txg_hold_open, but
892 * before processing the dnode holds, due to the
893 * dmu_tx_unassign() logic.
894 */
895
896 uint64_t towrite = 0;
897 uint64_t tohold = 0;
898 for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL;
899 txh = list_next(&tx->tx_holds, txh)) {
900 dnode_t *dn = txh->txh_dnode;
901 if (dn != NULL) {
949
950 if (dn == NULL)
951 continue;
952 mutex_enter(&dn->dn_mtx);
953 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
954
955 if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
956 dn->dn_assigned_txg = 0;
957 cv_broadcast(&dn->dn_notxholds);
958 }
959 mutex_exit(&dn->dn_mtx);
960 }
961
962 txg_rele_to_sync(&tx->tx_txgh);
963
964 tx->tx_lasttried_txg = tx->tx_txg;
965 tx->tx_txg = 0;
966 }
967
968 /*
969 * Assign tx to a transaction group. txg_how can be one of:
970 *
971 * (1) TXG_WAIT. If the current open txg is full, waits until there's
972 * a new one. This should be used when you're not holding locks.
973 * It will only fail if we're truly out of space (or over quota).
974 *
975 * (2) TXG_NOWAIT. If we can't assign into the current open txg without
976 * blocking, returns immediately with ERESTART. This should be used
977 * whenever you're holding locks. On an ERESTART error, the caller
978 * should drop locks, do a dmu_tx_wait(tx), and try again.
979 *
980 * (3) TXG_WAITED. Like TXG_NOWAIT, but indicates that dmu_tx_wait()
981 * has already been called on behalf of this operation (though
982 * most likely on a different tx).
983 */
984 int
985 dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
986 {
987 int err;
988
989 ASSERT(tx->tx_txg == 0);
990 ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT ||
991 txg_how == TXG_WAITED);
992 ASSERT(!dsl_pool_sync_context(tx->tx_pool));
993
994 /* If we might wait, we must not hold the config lock. */
995 ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
996
997 if (txg_how == TXG_WAITED)
998 tx->tx_waited = B_TRUE;
999
1000 while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
1001 dmu_tx_unassign(tx);
1002
1003 if (err != ERESTART || txg_how != TXG_WAIT)
1004 return (err);
1005
1006 dmu_tx_wait(tx);
1007 }
1008
1009 txg_rele_to_quiesce(&tx->tx_txgh);
1010
1011 return (0);
1012 }
1013
1014 void
1015 dmu_tx_wait(dmu_tx_t *tx)
1016 {
1017 spa_t *spa = tx->tx_pool->dp_spa;
1018 dsl_pool_t *dp = tx->tx_pool;
1019
1020 ASSERT(tx->tx_txg == 0);
1021 ASSERT(!dsl_pool_config_held(tx->tx_pool));
1022
1023 if (tx->tx_wait_dirty) {
1024 /*
1025 * dmu_tx_try_assign() has determined that we need to wait
1026 * because we've consumed much or all of the dirty buffer
1027 * space.
1028 */
1029 mutex_enter(&dp->dp_lock);
1030 while (dp->dp_dirty_total >= zfs_dirty_data_max)
1031 cv_wait(&dp->dp_spaceavail_cv, &dp->dp_lock);
1032 uint64_t dirty = dp->dp_dirty_total;
1033 mutex_exit(&dp->dp_lock);
1034
1035 dmu_tx_delay(tx, dirty);
1036
1037 tx->tx_wait_dirty = B_FALSE;
1038
1039 /*
1040 * Note: setting tx_waited only has effect if the caller
1041 * used TX_WAIT. Otherwise they are going to destroy
1042 * this tx and try again. The common case, zfs_write(),
1043 * uses TX_WAIT.
1044 */
1045 tx->tx_waited = B_TRUE;
1046 } else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) {
1047 /*
1048 * If the pool is suspended we need to wait until it
1049 * is resumed. Note that it's possible that the pool
1050 * has become active after this thread has tried to
1051 * obtain a tx. If that's the case then tx_lasttried_txg
1052 * would not have been set.
1053 */
1054 txg_wait_synced(dp, spa_last_synced_txg(spa) + 1);
1055 } else if (tx->tx_needassign_txh) {
1056 /*
1057 * A dnode is assigned to the quiescing txg. Wait for its
1058 * transaction to complete.
1059 */
1060 dnode_t *dn = tx->tx_needassign_txh->txh_dnode;
1061
1062 mutex_enter(&dn->dn_mtx);
1063 while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1)
1064 cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
1065 mutex_exit(&dn->dn_mtx);
1066 tx->tx_needassign_txh = NULL;
1067 } else {
1068 /*
1069 * If we have a lot of dirty data just wait until we sync
1070 * out a TXG at which point we'll hopefully have synced
1071 * a portion of the changes.
1072 */
1073 txg_wait_synced(dp, spa_last_synced_txg(spa) + 1);
1074 }
1075 }
1076
1077 static void
1078 dmu_tx_destroy(dmu_tx_t *tx)
1079 {
1080 dmu_tx_hold_t *txh;
1081
1082 while ((txh = list_head(&tx->tx_holds)) != NULL) {
1083 dnode_t *dn = txh->txh_dnode;
1084
1085 list_remove(&tx->tx_holds, txh);
1086 refcount_destroy_many(&txh->txh_space_towrite,
1087 refcount_count(&txh->txh_space_towrite));
1088 refcount_destroy_many(&txh->txh_memory_tohold,
1089 refcount_count(&txh->txh_memory_tohold));
1090 kmem_free(txh, sizeof (dmu_tx_hold_t));
1091 if (dn != NULL)
1092 dnode_rele(dn, tx);
1093 }
1109 for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL;
1110 txh = list_next(&tx->tx_holds, txh)) {
1111 dnode_t *dn = txh->txh_dnode;
1112
1113 if (dn == NULL)
1114 continue;
1115
1116 mutex_enter(&dn->dn_mtx);
1117 ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
1118
1119 if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
1120 dn->dn_assigned_txg = 0;
1121 cv_broadcast(&dn->dn_notxholds);
1122 }
1123 mutex_exit(&dn->dn_mtx);
1124 }
1125
1126 if (tx->tx_tempreserve_cookie)
1127 dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
1128
1129 if (!list_is_empty(&tx->tx_callbacks)) {
1130 if (dmu_tx_is_syncing(tx)) {
1131 txg_register_callbacks_sync(tx->tx_pool,
1132 tx->tx_txg, &tx->tx_callbacks);
1133 } else {
1134 txg_register_callbacks(&tx->tx_txgh,
1135 &tx->tx_callbacks);
1136 }
1137 }
1138
1139 if (tx->tx_anyobj == FALSE)
1140 txg_rele_to_sync(&tx->tx_txgh);
1141
1142 dmu_tx_destroy(tx);
1143 }
1144
1145 void
1146 dmu_tx_abort(dmu_tx_t *tx)
1147 {
1148 ASSERT(tx->tx_txg == 0);
1149
1150 /*
1151 * Call any registered callbacks with an error code.
1152 */
1153 if (!list_is_empty(&tx->tx_callbacks))
1154 dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED);
1155
1156 dmu_tx_destroy(tx);
1157 }
|