Print this page
NEX-13374 NDMP should be able to backup unmounted ZFS filesystems
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-5801 Snapshots left over after failed backups
Reviewed by: Rick Mesta <rick.mesta@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Revert "NEX-5801 Snapshots left over after failed backups"
This reverts commit f182fb95f09036db71fbfc6f0a6b90469b761f21.
NEX-5801 Snapshots left over after failed backups
Reviewed by: Rick Mesta <rick.mesta@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-2911 NDMP logging should use syslog and is too chatty

@@ -1,10 +1,11 @@
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2016 Martin Matuska. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
  */
 
 /*
  * BSD 3 Clause License
  *

@@ -37,316 +38,294 @@
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <syslog.h>
 #include <stdio.h>
 #include <string.h>
+#include <sys/mount.h>
 #include "ndmpd.h"
 #include <libzfs.h>
 
-typedef struct snap_param {
-        char *snp_name;
-        boolean_t snp_found;
-} snap_param_t;
-
-static int cleanup_fd = -1;
-
 /*
- * ndmp_has_backup
- *
- * Call backup function which looks for backup snapshot.
- * This is a callback function used with zfs_iter_snapshots.
- *
- * Parameters:
- *   zhp (input) - ZFS handle pointer
- *   data (output) - 0 - no backup snapshot
- *                   1 - has backup snapshot
- *
- * Returns:
- *   0: on success
- *  -1: otherwise
- */
-static int
-ndmp_has_backup(zfs_handle_t *zhp, void *data)
-{
-        const char *name;
-        snap_param_t *chp = (snap_param_t *)data;
-
-        name = zfs_get_name(zhp);
-        if (name == NULL ||
-            strstr(name, chp->snp_name) == NULL) {
-                zfs_close(zhp);
-                return (-1);
-        }
-
-        chp->snp_found = 1;
-        zfs_close(zhp);
-
-        return (0);
-}
-
-/*
- * ndmp_has_backup_snapshot
- *
- * Returns TRUE if the volume has an active backup snapshot, otherwise,
- * returns FALSE.
- *
- * Parameters:
- *   volname (input) - name of the volume
- *
- * Returns:
- *   0: on success
- *  -1: otherwise
- */
-static int
-ndmp_has_backup_snapshot(char *volname, char *jobname)
-{
-        zfs_handle_t *zhp;
-        snap_param_t snp;
-        char chname[ZFS_MAX_DATASET_NAME_LEN];
-
-        (void) mutex_lock(&zlib_mtx);
-        if ((zhp = zfs_open(zlibh, volname, ZFS_TYPE_DATASET)) == 0) {
-                NDMP_LOG(LOG_ERR, "Cannot open snapshot %s.", volname);
-                (void) mutex_unlock(&zlib_mtx);
-                return (-1);
-        }
-
-        snp.snp_found = 0;
-        (void) snprintf(chname, ZFS_MAX_DATASET_NAME_LEN, "@%s", jobname);
-        snp.snp_name = chname;
-
-        (void) zfs_iter_snapshots(zhp, B_FALSE, ndmp_has_backup, &snp);
-        zfs_close(zhp);
-        (void) mutex_unlock(&zlib_mtx);
-
-        return (snp.snp_found);
-}
-
-/*
- * ndmp_create_snapshot
- *
- * This function will parse the path to get the real volume name.
- * It will then create a snapshot based on volume and job name.
- * This function should be called before the NDMP backup is started.
- *
- * Parameters:
- *   vol_name (input) - name of the volume
- *
- * Returns:
- *   0: on success
- *   -1: otherwise
- */
-int
-ndmp_create_snapshot(char *vol_name, char *jname)
-{
-        char vol[ZFS_MAX_DATASET_NAME_LEN];
-
-        if (vol_name == 0 ||
-            get_zfsvolname(vol, sizeof (vol), vol_name) == -1)
-                return (0);
-
-        /*
-         * If there is an old snapshot left from the previous
-         * backup it could be stale one and it must be
-         * removed before using it.
-         */
-        if (ndmp_has_backup_snapshot(vol, jname))
-                (void) snapshot_destroy(vol, jname, B_FALSE, B_TRUE, NULL);
-
-        return (snapshot_create(vol, jname, B_FALSE, B_TRUE));
-}
-
-/*
- * ndmp_remove_snapshot
- *
- * This function will parse the path to get the real volume name.
- * It will then remove the snapshot for that volume and job name.
- * This function should be called after NDMP backup is finished.
- *
- * Parameters:
- *   vol_name (input) - name of the volume
- *
- * Returns:
- *   0: on success
- *   -1: otherwise
- */
-int
-ndmp_remove_snapshot(char *vol_name, char *jname)
-{
-        char vol[ZFS_MAX_DATASET_NAME_LEN];
-
-        if (vol_name == 0 ||
-            get_zfsvolname(vol, sizeof (vol), vol_name) == -1)
-                return (0);
-
-        return (snapshot_destroy(vol, jname, B_FALSE, B_TRUE, NULL));
-}
-
-/*
  * Put a hold on snapshot
  */
 int
-snapshot_hold(char *volname, char *snapname, char *jname, boolean_t recursive)
+snapshot_hold(char *volname, char *snapname, char *jname)
 {
         zfs_handle_t *zhp;
         char *p;
 
         if ((zhp = zfs_open(zlibh, volname, ZFS_TYPE_DATASET)) == 0) {
-                NDMP_LOG(LOG_ERR, "Cannot open volume %s.", volname);
+                syslog(LOG_ERR, "Cannot open volume %s.", volname);
                 return (-1);
         }
-
-        if (cleanup_fd == -1 && (cleanup_fd = open(ZFS_DEV,
-            O_RDWR|O_EXCL)) < 0) {
-                NDMP_LOG(LOG_ERR, "Cannot open dev %d", errno);
-                zfs_close(zhp);
-                return (-1);
-        }
-
         p = strchr(snapname, '@') + 1;
-        if (zfs_hold(zhp, p, jname, recursive, cleanup_fd) != 0) {
-                NDMP_LOG(LOG_ERR, "Cannot hold snapshot %s", p);
+        /*
+         * The -1 tells the lower levels there are no snapshots
+         * to clean up.
+         */
+        if (zfs_hold(zhp, p, jname, B_FALSE, -1) != 0) {
+                syslog(LOG_ERR, "Cannot hold snapshot %s", p);
                 zfs_close(zhp);
                 return (-1);
         }
         zfs_close(zhp);
         return (0);
 }
 
 int
-snapshot_release(char *volname, char *snapname, char *jname,
-    boolean_t recursive)
+snapshot_release(char *volname, char *snapname, char *jname)
 {
         zfs_handle_t *zhp;
         char *p;
         int rv = 0;
 
         if ((zhp = zfs_open(zlibh, volname, ZFS_TYPE_DATASET)) == 0) {
-                NDMP_LOG(LOG_ERR, "Cannot open volume %s", volname);
+                syslog(LOG_ERR, "Cannot open volume %s", volname);
                 return (-1);
         }
 
         p = strchr(snapname, '@') + 1;
-        if (zfs_release(zhp, p, jname, recursive) != 0) {
-                NDMP_LOG(LOG_DEBUG, "Cannot release snapshot %s", p);
+        if (zfs_release(zhp, p, jname, B_FALSE) != 0) {
+                syslog(LOG_DEBUG, "Cannot release snapshot %s", p);
                 rv = -1;
         }
-        if (cleanup_fd != -1) {
-                (void) close(cleanup_fd);
-                cleanup_fd = -1;
-        }
         zfs_close(zhp);
         return (rv);
 }
 
 /*
- * Create a snapshot on the volume
+ * Create a snapshot, put a hold on it, clone it, and mount it in a
+ * well known location for so the backup process can traverse its
+ * directory tree structure.
  */
 int
-snapshot_create(char *volname, char *jname, boolean_t recursive,
-    boolean_t hold)
+backup_dataset_create(ndmp_lbr_params_t *nlp)
 {
-        char snapname[ZFS_MAX_DATASET_NAME_LEN];
+        char zpoolname[ZFS_MAX_DATASET_NAME_LEN];
+        char *slash;
         int rv;
 
-        if (!volname || !*volname)
+        if (nlp == NULL) {
                 return (-1);
+        }
 
-        (void) snprintf(snapname, ZFS_MAX_DATASET_NAME_LEN,
-            "%s@%s", volname, jname);
+        (void) strlcpy(zpoolname, nlp->nlp_vol, sizeof (zpoolname));
+        /*
+         * Pull out the pool name component from the volname
+         * to use it to build snapshot and clone names.
+         */
+        slash = strchr(zpoolname, '/');
+        if (slash != NULL) {
+                *slash = '\0';
+        }
 
+        (void) snprintf(nlp->nlp_clonename, sizeof (nlp->nlp_clonename),
+            "%s/%s", zpoolname, nlp->nlp_job_name);
+
         (void) mutex_lock(&zlib_mtx);
-        if ((rv = zfs_snapshot(zlibh, snapname, recursive, NULL))
-            == -1) {
+
+        /*
+         * If "checkpoint" is not enabled, create the normal
+         * snapshot and continue normal backup.  If it is
+         * enabled, the "checkpoint" name has been already set
+         * so we just have to clone it.
+         */
+        if (!NLP_ISCHKPNTED(nlp)) {
+                (void) snprintf(nlp->nlp_snapname, sizeof (nlp->nlp_snapname),
+                    "%s@%s", nlp->nlp_vol, nlp->nlp_job_name);
+
+                if ((rv = zfs_snapshot(zlibh, nlp->nlp_snapname,
+                    B_FALSE, NULL)) != 0) {
                 if (errno == EEXIST) {
                         (void) mutex_unlock(&zlib_mtx);
                         return (0);
                 }
-                NDMP_LOG(LOG_DEBUG,
-                    "snapshot_create: %s failed (err=%d): %s",
-                    snapname, errno, libzfs_error_description(zlibh));
+                        syslog(LOG_ERR,
+                            "backup_dataset_create: %s failed (err=%d): %s",
+                            nlp->nlp_snapname, errno,
+                            libzfs_error_description(zlibh));
                 (void) mutex_unlock(&zlib_mtx);
                 return (rv);
         }
-        if (hold && snapshot_hold(volname, snapname, jname, recursive) != 0) {
-                NDMP_LOG(LOG_DEBUG,
-                    "snapshot_create: %s hold failed (err=%d): %s",
-                    snapname, errno, libzfs_error_description(zlibh));
+                if (snapshot_hold(nlp->nlp_vol,
+                    nlp->nlp_snapname, NDMP_RCF_BASENAME) != 0) {
+                        syslog(LOG_DEBUG,
+                            "backup_dataset_create: %s "
+                            "hold failed (err=%d): %s",
+                            nlp->nlp_snapname,
+                            errno, libzfs_error_description(zlibh));
                 (void) mutex_unlock(&zlib_mtx);
                 return (-1);
         }
+                syslog(LOG_DEBUG,
+                    "Using %s NdmpBackup snapshot for backup",
+                    nlp->nlp_snapname);
 
+        }
+
+        if (ndmp_clone_snapshot(nlp) != 0) {
+                syslog(LOG_ERR,
+                    "backup_dataset_create: %s clone failed (err=%d): %s",
+                    nlp->nlp_snapname, errno, libzfs_error_description(zlibh));
         (void) mutex_unlock(&zlib_mtx);
+                return (-1);
+        }
+        (void) mutex_unlock(&zlib_mtx);
         return (0);
 }
 
 /*
- * Remove and release the backup snapshot
+ * Unmount, release, and destroy the snapshot created for backup.
  */
 int
-snapshot_destroy(char *volname, char *jname, boolean_t recursive,
-    boolean_t hold, int *zfs_err)
+backup_dataset_destroy(ndmp_lbr_params_t *nlp)
 {
-        char snapname[ZFS_MAX_DATASET_NAME_LEN];
-        zfs_handle_t *zhp;
-        zfs_type_t ztype;
-        char *namep;
+        char zpoolname[ZFS_MAX_DATASET_NAME_LEN];
+        char *slash;
+        zfs_handle_t *vol_zhp;
+        zfs_handle_t *cln_zhp;
         int err;
+        int rv = 0;
 
-        if (zfs_err)
-                *zfs_err = 0;
-
-        if (!volname || !*volname)
+        if (nlp == NULL) {
+                syslog(LOG_DEBUG,
+                    "nlp NULL in backup_dataset_destroy");
                 return (-1);
+        }
 
-        if (recursive) {
-                ztype = ZFS_TYPE_VOLUME | ZFS_TYPE_FILESYSTEM;
-                namep = volname;
-        } else {
-                (void) snprintf(snapname, ZFS_MAX_DATASET_NAME_LEN,
-                    "%s@%s", volname, jname);
-                namep = snapname;
-                ztype = ZFS_TYPE_SNAPSHOT;
+        (void) strlcpy(zpoolname, nlp->nlp_vol, sizeof (zpoolname));
+        slash = strchr(zpoolname, '/');
+        if (slash != NULL) {
+                *slash = '\0';
         }
 
+        if (!NLP_ISCHKPNTED(nlp)) {
+                (void) snprintf(nlp->nlp_snapname, sizeof (nlp->nlp_snapname),
+                    "%s@%s", nlp->nlp_vol, nlp->nlp_job_name);
+        }
+
+
+        syslog(LOG_DEBUG, "Snapname in backup_dataset_destroy is [%s]",
+            nlp->nlp_snapname);
+
+        /*
+         * Destroy using this sequence
+         * zfs release <volume>@<jname>
+         * zfs destroy <pool>/<jname>
+         * zfs destroy <pool>/<volume>@<jname>
+         */
         (void) mutex_lock(&zlib_mtx);
-        if (hold &&
-            snapshot_release(volname, namep, jname, recursive) != 0) {
-                NDMP_LOG(LOG_DEBUG,
-                    "snapshot_destroy: %s release failed (err=%d): %s",
-                    namep, errno, libzfs_error_description(zlibh));
+
+        /*
+         * Release the normal snapshot but don't try to
+         * release if it's a "checkpoint" because the hold
+         * wasn't put on it to begin with.
+         */
+        if (!NLP_ISCHKPNTED(nlp)) {
+                if (snapshot_release(nlp->nlp_vol,
+                    nlp->nlp_snapname, NDMP_RCF_BASENAME) != 0) {
+                        syslog(LOG_DEBUG,
+                            "backup_dataset_destroy: %s "
+                            "release failed (err=%d): %s",
+                            nlp->nlp_clonename, errno,
+                            libzfs_error_description(zlibh));
                 (void) mutex_unlock(&zlib_mtx);
                 return (-1);
         }
+        } else {
+                syslog(LOG_DEBUG, "Checkpointed dataset not held "
+                    "will not release [%s]", nlp->nlp_snapname);
+        }
 
-        if ((zhp = zfs_open(zlibh, namep, ztype)) == NULL) {
-                NDMP_LOG(LOG_DEBUG, "snapshot_destroy: open %s failed",
-                    namep);
+        /*
+         * Open the clone to get descriptor
+         */
+        if ((cln_zhp = zfs_open(zlibh, nlp->nlp_clonename,
+            ZFS_TYPE_VOLUME | ZFS_TYPE_FILESYSTEM)) == NULL) {
+                syslog(LOG_ERR,
+                    "backup_dataset_destroy: open %s failed",
+                    nlp->nlp_clonename);
                 (void) mutex_unlock(&zlib_mtx);
                 return (-1);
         }
 
-        if (recursive) {
-                err = zfs_destroy_snaps(zhp, jname, B_TRUE);
-        } else {
-                err = zfs_destroy(zhp, B_TRUE);
+        /*
+         * Open the mounted clone to get descriptor for unmount
+         */
+        if ((vol_zhp = zfs_open(zlibh, nlp->nlp_vol,
+            ZFS_TYPE_VOLUME | ZFS_TYPE_FILESYSTEM)) == NULL) {
+                syslog(LOG_ERR,
+                    "backup_dataset_destroy: open %s failed [while trying "
+                    "to destroy]", nlp->nlp_vol);
+                zfs_close(cln_zhp);
+                (void) mutex_unlock(&zlib_mtx);
+                return (-1);
         }
 
+        /*
+         * This unmounts the clone which was just traversed for backup
+         */
+        if ((err = zfs_unmount(cln_zhp, NULL, 0)) != 0) {
+                syslog(LOG_INFO, "failed to unmount [%s]", nlp->nlp_clonename);
+                rv = -1;
+                goto _out;
+        }
+
+        /*
+         * This destroys the clone
+         */
+        err = zfs_destroy(cln_zhp, B_TRUE);
         if (err) {
-                NDMP_LOG(LOG_ERR, "%s (recursive destroy: %d): %d; %s; %s",
-                    namep,
-                    recursive,
+                syslog(LOG_ERR, "%s destroy: %d; %s; %s",
+                    nlp->nlp_clonename,
                     libzfs_errno(zlibh),
                     libzfs_error_action(zlibh),
                     libzfs_error_description(zlibh));
+                rv = -1;
+                goto _out;
+        }
 
-                if (zfs_err)
-                        *zfs_err = err;
+        /*
+         * This destroys the snapshot of the current backup - but,
+         * don't destroy it if it is an "checkpoint" from AutoSync
+         * or HPR.
+         */
+        if (!NLP_ISCHKPNTED(nlp)) {
+                if ((err = zfs_destroy_snaps(vol_zhp,
+                    nlp->nlp_job_name, B_TRUE))) {
+                        syslog(LOG_ERR, "%s destroy: %d; %s; %s",
+                            nlp->nlp_job_name,
+                            libzfs_errno(zlibh),
+                            libzfs_error_action(zlibh),
+                            libzfs_error_description(zlibh));
+                        rv = -1;
+                        syslog(LOG_DEBUG, "Destroy [%s]", nlp->nlp_snapname);
+                        goto _out;
         }
+        } else {
+                syslog(LOG_DEBUG, "Checkpointed checkpoint will not destroy [%s]",
+                    nlp->nlp_snapname);
+        }
 
-        zfs_close(zhp);
+_out:
+        zfs_close(vol_zhp);
+        zfs_close(cln_zhp);
         (void) mutex_unlock(&zlib_mtx);
 
-        return (0);
+        /*
+         * The zfs_clone() call will have mounted the snapshot
+         * in the file system at this point - so clean it up.
+         */
+        if (rv == 0) {
+                if (rmdir(nlp->nlp_mountpoint) != 0) {
+                        syslog(LOG_ERR,
+                            "Failed to remove mount point [%s]",
+                            nlp->nlp_mountpoint);
+                        return (-1);
+                }
+        }
+
+        return (rv);
 }