Print this page
NEX-9752 backport illumos 6950 ARC should cache compressed data
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
6950 ARC should cache compressed data
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Don Brady <don.brady@intel.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-7603 Back port OpenZFS #188 Create tunable to ignore hole_birth
feature
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Dan Fields <dan.fields@nexenta.com>
2605 want to resume interrupted zfs send
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed by: Xin Li <delphij@freebsd.org>
Reviewed by: Arne Jansen <sensille@gmx.net>
Approved by: Dan McDonald <danmcd@omniti.com>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
NEX-4207 WRC and dedup on the same pool cause system-panic
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
NEX-4193 WRC does not migrate data that belong to intermediate snapshots
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
NEX-3710 WRC improvements and bug-fixes
 * refactored WRC move-logic to use zio kmem_cashes
 * replace size and compression fields by blk_prop field
   (the same in blkptr_t) to little reduce size of wrc_block_t
   and use similar macros as for blkptr_t to get PSIZE, LSIZE
   and COMPRESSION
 * make CPU more happy by reduce atomic calls
 * removed unused code
 * fixed naming of variables
 * fixed possible system panic after restart system
   with enabled WRC
 * fixed a race that causes system panic
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-3558 KRRP Integration
4459 Typo in itadm(1m) usage message: delete-inititator
Reviewed by: Milan Jurik <milan.jurik@xylab.cz>
Reviewed by: Marcel Telka <marcel@telka.sk>
Approved by: Robert Mustacchi <rm@joyent.com>
4504 traverse_visitbp: visit DMU_GROUPUSED_OBJECT before DMU_USERUSED_OBJECT
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Andriy Gapon <andriy.gapon@hybridcluster.com>
Reviewed by: Saso Kiselkov <skiselkov.ml@gmail.com>
Approved by: Robert Mustacchi <rm@joyent.com>
4391 panic system rather than corrupting pool if we hit bug 4390
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Approved by: Gordon Ross <gwr@nexenta.com>
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
re #12619 rb4429 More dp->dp_config_rwlock holds
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
Bug 11205: add missing libzfs_closed_stubs.c to fix opensource-only build.
ZFS plus work: special vdevs, cos, cos/vdev properties
        
@@ -18,10 +18,11 @@
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/dmu_objset.h>
@@ -54,10 +55,11 @@
 typedef struct traverse_data {
         spa_t *td_spa;
         uint64_t td_objset;
         blkptr_t *td_rootbp;
         uint64_t td_min_txg;
+        uint64_t td_max_txg;
         zbookmark_phys_t *td_resume;
         int td_flags;
         prefetch_data_t *td_pfd;
         boolean_t td_paused;
         uint64_t td_hole_birth_enabled_txg;
@@ -189,11 +191,12 @@
          * some children will not be needed (and in fact may have already
          * been freed).
          */
         if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume))
                 return;
-        if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg)
+        if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg ||
+            bp->blk_birth >= td->td_max_txg)
                 return;
         if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
                 return;
 
         (void) arc_read(NULL, td->td_spa, bp, NULL, NULL,
@@ -255,13 +258,15 @@
                  * Note that the meta-dnode cannot be reallocated.
                  */
                 if (!send_holes_without_birth_time &&
                     (!td->td_realloc_possible ||
                     zb->zb_object == DMU_META_DNODE_OBJECT) &&
-                    td->td_hole_birth_enabled_txg <= td->td_min_txg)
+                    (td->td_hole_birth_enabled_txg <= td->td_min_txg ||
+                    td->td_hole_birth_enabled_txg > td->td_max_txg))
                         return (0);
-        } else if (bp->blk_birth <= td->td_min_txg) {
+        } else if (bp->blk_birth <= td->td_min_txg ||
+            bp->blk_birth >= td->td_max_txg) {
                 return (0);
         }
 
         if (pd != NULL && !pd->pd_exited && prefetch_needed(pd, bp)) {
                 uint64_t size = BP_GET_LSIZE(bp);
@@ -284,10 +289,13 @@
         if (td->td_flags & TRAVERSE_PRE) {
                 err = td->td_func(td->td_spa, NULL, bp, zb, dnp,
                     td->td_arg);
                 if (err == TRAVERSE_VISIT_NO_CHILDREN)
                         return (0);
+                /* handle pausing at a common point */
+                if (err == ERESTART)
+                        td->td_paused = B_TRUE;
                 if (err != 0)
                         goto post;
         }
 
         if (BP_GET_LEVEL(bp) > 0) {
@@ -418,10 +426,16 @@
                             (dnp->dn_indblkshift - SPA_BLKPTRSHIFT);
                 }
                 td->td_paused = B_TRUE;
         }
 
+        /* if we walked over all bp bookmark must be cleared */
+        if (!err && !td->td_paused && td->td_resume != NULL &&
+            bp == td->td_rootbp && td->td_pfd != NULL) {
+                bzero(td->td_resume, sizeof (*td->td_resume));
+        }
+
         return (err);
 }
 
 static void
 prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *dnp,
@@ -544,12 +558,12 @@
  * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
  * in syncing context).
  */
 static int
 traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
-    uint64_t txg_start, zbookmark_phys_t *resume, int flags,
-    blkptr_cb_t func, void *arg)
+    uint64_t txg_start, uint64_t txg_finish, zbookmark_phys_t *resume,
+    int flags, blkptr_cb_t func, void *arg)
 {
         traverse_data_t td;
         prefetch_data_t pd = { 0 };
         zbookmark_phys_t czb;
         int err;
@@ -559,10 +573,11 @@
 
         td.td_spa = spa;
         td.td_objset = objset;
         td.td_rootbp = rootbp;
         td.td_min_txg = txg_start;
+        td.td_max_txg = txg_finish;
         td.td_resume = resume;
         td.td_func = func;
         td.td_arg = arg;
         td.td_pfd = &pd;
         td.td_flags = flags;
@@ -629,11 +644,12 @@
 traverse_dataset_resume(dsl_dataset_t *ds, uint64_t txg_start,
     zbookmark_phys_t *resume,
     int flags, blkptr_cb_t func, void *arg)
 {
         return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
-            &dsl_dataset_phys(ds)->ds_bp, txg_start, resume, flags, func, arg));
+            &dsl_dataset_phys(ds)->ds_bp, txg_start, UINT64_MAX, resume, flags,
+            func, arg));
 }
 
 int
 traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start,
     int flags, blkptr_cb_t func, void *arg)
@@ -645,33 +661,36 @@
 traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
     uint64_t txg_start, zbookmark_phys_t *resume, int flags,
     blkptr_cb_t func, void *arg)
 {
         return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET,
-            blkptr, txg_start, resume, flags, func, arg));
+            blkptr, txg_start, UINT64_MAX, resume, flags, func, arg));
 }
 
 /*
  * NB: pool must not be changing on-disk (eg, from zdb or sync context).
  */
 int
-traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
-    blkptr_cb_t func, void *arg)
+traverse_pool(spa_t *spa, uint64_t txg_start, uint64_t txg_finish, int flags,
+    blkptr_cb_t func, void *arg, zbookmark_phys_t *zb)
 {
-        int err;
+        int err = 0, lasterr = 0;
         dsl_pool_t *dp = spa_get_dsl(spa);
         objset_t *mos = dp->dp_meta_objset;
         boolean_t hard = (flags & TRAVERSE_HARD);
 
         /* visit the MOS */
+        if (!zb || (zb->zb_objset == 0 && zb->zb_object == 0)) {
         err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
-            txg_start, NULL, flags, func, arg);
+                    txg_start, txg_finish, NULL, flags, func, arg);
         if (err != 0)
                 return (err);
+        }
 
         /* visit each dataset */
-        for (uint64_t obj = 1; err == 0;
+        for (uint64_t obj = (zb && !ZB_IS_ZERO(zb))? zb->zb_objset : 1;
+            err == 0 || (err != ESRCH && hard);
             err = dmu_object_next(mos, &obj, B_FALSE, txg_start)) {
                 dmu_object_info_t doi;
 
                 err = dmu_object_info(mos, obj, &doi);
                 if (err != 0) {
@@ -680,27 +699,73 @@
                         break;
                 }
 
                 if (doi.doi_bonus_type == DMU_OT_DSL_DATASET) {
                         dsl_dataset_t *ds;
+                        objset_t *os;
+                        boolean_t os_is_snapshot = B_FALSE;
                         uint64_t txg = txg_start;
+                        uint64_t ctxg;
+                        uint64_t max_txg = txg_finish;
 
                         dsl_pool_config_enter(dp, FTAG);
                         err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
                         dsl_pool_config_exit(dp, FTAG);
                         if (err != 0) {
                                 if (hard)
                                         continue;
                                 break;
                         }
-                        if (dsl_dataset_phys(ds)->ds_prev_snap_txg > txg)
+
+                        dsl_pool_config_enter(dp, FTAG);
+                        err = dmu_objset_from_ds(ds, &os);
+                        if (err == 0)
+                                os_is_snapshot = dmu_objset_is_snapshot(os);
+
+                        dsl_pool_config_exit(dp, FTAG);
+                        if (err != 0) {
+                                dsl_dataset_rele(ds, FTAG);
+                                if (hard)
+                                        continue;
+                                break;
+                        }
+                        ctxg = dsl_dataset_phys(ds)->ds_creation_txg;
+
+                        /* uplimited traverse walks over shapshots only */
+                        if (max_txg != UINT64_MAX && !os_is_snapshot) {
+                                dsl_dataset_rele(ds, FTAG);
+                                continue;
+                        }
+                        if (max_txg != UINT64_MAX && ctxg >= max_txg) {
+                                dsl_dataset_rele(ds, FTAG);
+                                continue;
+                        }
+                        if (os_is_snapshot && ctxg <= txg_start) {
+                                dsl_dataset_rele(ds, FTAG);
+                                continue;
+                        }
+                        if (max_txg == UINT64_MAX &&
+                            dsl_dataset_phys(ds)->ds_prev_snap_txg > txg)
                                 txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
-                        err = traverse_dataset(ds, txg, flags, func, arg);
+                        if (txg > max_txg)
+                                max_txg = txg;
+                        err = traverse_impl(spa, ds, ds->ds_object,
+                            &dsl_dataset_phys(ds)->ds_bp,
+                            txg, max_txg, zb, flags, func, arg);
                         dsl_dataset_rele(ds, FTAG);
-                        if (err != 0)
+                        if (err != 0) {
+                                if (!hard)
+                                        return (err);
+                                lasterr = err;
+                        }
+                        if (zb && !ZB_IS_ZERO(zb))
                                 break;
                 }
         }
-        if (err == ESRCH)
+        if (err == ESRCH) {
+                /* zero bookmark means we are done */
+                if (zb)
+                        bzero(zb, sizeof (*zb));
                 err = 0;
-        return (err);
+        }
+        return (err != 0 ? err : lasterr);
 }