Print this page
*** NO COMMENTS ***


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.

  26  */
  27 
  28 #include <assert.h>
  29 #include <ctype.h>
  30 #include <errno.h>
  31 #include <libintl.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <strings.h>
  35 #include <unistd.h>
  36 #include <stddef.h>
  37 #include <fcntl.h>
  38 #include <sys/mount.h>
  39 #include <pthread.h>
  40 #include <umem.h>
  41 #include <time.h>
  42 
  43 #include <libzfs.h>
  44 
  45 #include "zfs_namecheck.h"
  46 #include "zfs_prop.h"
  47 #include "zfs_fletcher.h"
  48 #include "libzfs_impl.h"
  49 #include <sha2.h>
  50 #include <sys/zio_checksum.h>
  51 #include <sys/ddt.h>
  52 
  53 /* in libzfs_dataset.c */
  54 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
  55 
  56 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
  57     int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
  58 
  59 static const zio_cksum_t zero_cksum = { 0 };
  60 
  61 typedef struct dedup_arg {
  62         int     inputfd;
  63         int     outputfd;


  64         libzfs_handle_t  *dedup_hdl;
  65 } dedup_arg_t;
  66 
  67 typedef struct progress_arg {
  68         zfs_handle_t *pa_zhp;
  69         int pa_fd;
  70         boolean_t pa_parsable;
  71 } progress_arg_t;
  72 
  73 typedef struct dataref {
  74         uint64_t ref_guid;
  75         uint64_t ref_object;
  76         uint64_t ref_offset;
  77 } dataref_t;
  78 
  79 typedef struct dedup_entry {
  80         struct dedup_entry      *dde_next;
  81         zio_cksum_t dde_chksum;
  82         uint64_t dde_prop;
  83         dataref_t dde_ref;


 167         for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
 168             ddepp = &((*ddepp)->dde_next)) {
 169                 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
 170                     (*ddepp)->dde_prop == prop) {
 171                         *dr = (*ddepp)->dde_ref;
 172                         return (B_TRUE);
 173                 }
 174         }
 175         ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
 176         return (B_FALSE);
 177 }
 178 
 179 static int
 180 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
 181 {
 182         fletcher_4_incremental_native(buf, len, zc);
 183         return (write(outfd, buf, len));
 184 }
 185 
 186 /*


















 187  * This function is started in a separate thread when the dedup option
 188  * has been requested.  The main send thread determines the list of
 189  * snapshots to be included in the send stream and makes the ioctl calls
 190  * for each one.  But instead of having the ioctl send the output to the
 191  * the output fd specified by the caller of zfs_send()), the
 192  * ioctl is told to direct the output to a pipe, which is read by the
 193  * alternate thread running THIS function.  This function does the
 194  * dedup'ing by:
 195  *  1. building a dedup table (the DDT)
 196  *  2. doing checksums on each data block and inserting a record in the DDT
 197  *  3. looking for matching checksums, and
 198  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
 199  *      a duplicate block is found.
 200  * The output of this function then goes to the output fd requested
 201  * by the caller of zfs_send().
 202  */
 203 static void *
 204 cksummer(void *arg)
 205 {
 206         dedup_arg_t *dda = arg;


 244         /* Initialize the write-by-reference block. */
 245         wbr_drr.drr_type = DRR_WRITE_BYREF;
 246         wbr_drr.drr_payloadlen = 0;
 247 
 248         outfd = dda->outputfd;
 249         ofp = fdopen(dda->inputfd, "r");
 250         while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
 251 
 252                 switch (drr->drr_type) {
 253                 case DRR_BEGIN:
 254                 {
 255                         int     fflags;
 256                         ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
 257 
 258                         /* set the DEDUP feature flag for this stream */
 259                         fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 260                         fflags |= (DMU_BACKUP_FEATURE_DEDUP |
 261                             DMU_BACKUP_FEATURE_DEDUPPROPS);
 262                         DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
 263 
 264                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),

 265                             &stream_cksum, outfd) == -1)
 266                                 goto out;
 267                         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 268                             DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
 269                                 int sz = drr->drr_payloadlen;
 270 
 271                                 if (sz > 1<<20) {
 272                                         free(buf);
 273                                         buf = malloc(sz);
 274                                 }
 275                                 (void) ssread(buf, sz, ofp);
 276                                 if (ferror(stdin))
 277                                         perror("fread");
 278                                 if (cksum_and_write(buf, sz, &stream_cksum,
 279                                     outfd) == -1)
 280                                         goto out;
 281                         }
 282                         break;
 283                 }
 284 
 285                 case DRR_END:
 286                 {
 287                         /* use the recalculated checksum */
 288                         ZIO_SET_CHECKSUM(&drre->drr_checksum,
 289                             stream_cksum.zc_word[0], stream_cksum.zc_word[1],
 290                             stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
 291                         if ((write(outfd, drr,
 292                             sizeof (dmu_replay_record_t))) == -1)
 293                                 goto out;

 294                         break;
 295                 }
 296 
 297                 case DRR_OBJECT:
 298                 {
 299                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),

 300                             &stream_cksum, outfd) == -1)
 301                                 goto out;
 302                         if (drro->drr_bonuslen > 0) {
 303                                 (void) ssread(buf,
 304                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 305                                     ofp);
 306                                 if (cksum_and_write(buf,
 307                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 308                                     &stream_cksum, outfd) == -1)
 309                                         goto out;
 310                         }
 311                         break;
 312                 }
 313 
 314                 case DRR_SPILL:
 315                 {
 316                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),

 317                             &stream_cksum, outfd) == -1)
 318                                 goto out;
 319                         (void) ssread(buf, drrs->drr_length, ofp);
 320                         if (cksum_and_write(buf, drrs->drr_length,
 321                             &stream_cksum, outfd) == -1)
 322                                 goto out;
 323                         break;
 324                 }
 325 
 326                 case DRR_FREEOBJECTS:
 327                 {
 328                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),

 329                             &stream_cksum, outfd) == -1)
 330                                 goto out;
 331                         break;
 332                 }
 333 
 334                 case DRR_WRITE:
 335                 {
 336                         dataref_t       dataref;
 337 
 338                         (void) ssread(buf, drrw->drr_length, ofp);
 339 
 340                         /*
 341                          * Use the existing checksum if it's dedup-capable,
 342                          * else calculate a SHA256 checksum for it.
 343                          */
 344 
 345                         if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
 346                             zero_cksum) ||
 347                             !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
 348                                 SHA256_CTX      ctx;


 373                                 /* block already present in stream */
 374                                 wbr_drrr->drr_object = drrw->drr_object;
 375                                 wbr_drrr->drr_offset = drrw->drr_offset;
 376                                 wbr_drrr->drr_length = drrw->drr_length;
 377                                 wbr_drrr->drr_toguid = drrw->drr_toguid;
 378                                 wbr_drrr->drr_refguid = dataref.ref_guid;
 379                                 wbr_drrr->drr_refobject =
 380                                     dataref.ref_object;
 381                                 wbr_drrr->drr_refoffset =
 382                                     dataref.ref_offset;
 383 
 384                                 wbr_drrr->drr_checksumtype =
 385                                     drrw->drr_checksumtype;
 386                                 wbr_drrr->drr_checksumflags =
 387                                     drrw->drr_checksumtype;
 388                                 wbr_drrr->drr_key.ddk_cksum =
 389                                     drrw->drr_key.ddk_cksum;
 390                                 wbr_drrr->drr_key.ddk_prop =
 391                                     drrw->drr_key.ddk_prop;
 392 
 393                                 if (cksum_and_write(&wbr_drr,
 394                                     sizeof (dmu_replay_record_t), &stream_cksum,
 395                                     outfd) == -1)
 396                                         goto out;
 397                         } else {
 398                                 /* block not previously seen */
 399                                 if (cksum_and_write(drr,
 400                                     sizeof (dmu_replay_record_t), &stream_cksum,
 401                                     outfd) == -1)
 402                                         goto out;
 403                                 if (cksum_and_write(buf,
 404                                     drrw->drr_length,
 405                                     &stream_cksum, outfd) == -1)
 406                                         goto out;
 407                         }
 408                         break;
 409                 }
 410 
 411                 case DRR_FREE:
 412                 {
 413                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),

 414                             &stream_cksum, outfd) == -1)
 415                                 goto out;
 416                         break;
 417                 }
 418 
 419                 default:
 420                         (void) printf("INVALID record type 0x%x\n",
 421                             drr->drr_type);
 422                         /* should never happen, so assert */
 423                         assert(B_FALSE);
 424                 }
 425         }
 426 out:
 427         umem_cache_destroy(ddt.ddecache);
 428         free(ddt.dedup_hash_array);
 429         free(buf);
 430         (void) fclose(ofp);
 431 
 432         return (NULL);
 433 }


 772         if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
 773                 nvlist_free(sd.fss);
 774                 *nvlp = NULL;
 775                 return (EZFS_NOMEM);
 776         }
 777 
 778         *nvlp = sd.fss;
 779         return (0);
 780 }
 781 
 782 /*
 783  * Routines specific to "zfs send"
 784  */
 785 typedef struct send_dump_data {
 786         /* these are all just the short snapname (the part after the @) */
 787         const char *fromsnap;
 788         const char *tosnap;
 789         char prevsnap[ZFS_MAXNAMELEN];
 790         uint64_t prevsnap_obj;
 791         boolean_t seenfrom, seento, replicate, doall, fromorigin;
 792         boolean_t verbose, dryrun, parsable, progress;



 793         int outfd;
 794         boolean_t err;
 795         nvlist_t *fss;
 796         avl_tree_t *fsavl;
 797         snapfilter_cb_t *filter_cb;
 798         void *filter_cb_arg;
 799         nvlist_t *debugnv;
 800         char holdtag[ZFS_MAXNAMELEN];
 801         int cleanup_fd;
 802         uint64_t size;
 803 } send_dump_data_t;
 804 
 805 static int
 806 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
 807     boolean_t fromorigin, uint64_t *sizep)
 808 {
 809         zfs_cmd_t zc = { 0 };
 810         libzfs_handle_t *hdl = zhp->zfs_hdl;
 811 
 812         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);


 851                 case EROFS:
 852                         zfs_error_aux(hdl, strerror(errno));
 853                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 854 
 855                 default:
 856                         return (zfs_standard_error(hdl, errno, errbuf));
 857                 }
 858         }
 859 
 860         *sizep = zc.zc_objset_type;
 861 
 862         return (0);
 863 }
 864 
 865 /*
 866  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
 867  * NULL) to the file descriptor specified by outfd.
 868  */
 869 static int
 870 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
 871     boolean_t fromorigin, int outfd, nvlist_t *debugnv)

 872 {
 873         zfs_cmd_t zc = { 0 };
 874         libzfs_handle_t *hdl = zhp->zfs_hdl;
 875         nvlist_t *thisdbg;
 876 
 877         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 878         assert(fromsnap_obj == 0 || !fromorigin);
 879 
 880         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 881         zc.zc_cookie = outfd;
 882         zc.zc_obj = fromorigin;
 883         zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 884         zc.zc_fromobj = fromsnap_obj;


 885 
 886         VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
 887         if (fromsnap && fromsnap[0] != '\0') {
 888                 VERIFY(0 == nvlist_add_string(thisdbg,
 889                     "fromsnap", fromsnap));
 890         }
 891 
 892         if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
 893                 char errbuf[1024];
 894                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 895                     "warning: cannot send '%s'"), zhp->zfs_name);
 896 
 897                 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
 898                 if (debugnv) {
 899                         VERIFY(0 == nvlist_add_nvlist(debugnv,
 900                             zhp->zfs_name, thisdbg));
 901                 }
 902                 nvlist_free(thisdbg);
 903 
 904                 switch (errno) {


 918 
 919                 case EDQUOT:
 920                 case EFBIG:
 921                 case EIO:
 922                 case ENOLINK:
 923                 case ENOSPC:
 924                 case ENOSTR:
 925                 case ENXIO:
 926                 case EPIPE:
 927                 case ERANGE:
 928                 case EFAULT:
 929                 case EROFS:
 930                         zfs_error_aux(hdl, strerror(errno));
 931                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 932 
 933                 default:
 934                         return (zfs_standard_error(hdl, errno, errbuf));
 935                 }
 936         }
 937 

 938         if (debugnv)
 939                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 940         nvlist_free(thisdbg);
 941 
 942         return (0);
 943 }
 944 
 945 static int
 946 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
 947 {
 948         zfs_handle_t *pzhp;
 949         int error = 0;
 950         char *thissnap;
 951 
 952         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 953 
 954         if (sdd->dryrun)
 955                 return (0);
 956 
 957         /*


1100                  * exist, and the next accepted snapshot will be sent as
1101                  * an incremental from the last accepted one, or as the
1102                  * first (and full) snapshot in the case of a replication,
1103                  * non-incremental send.
1104                  */
1105                 zfs_close(zhp);
1106                 return (0);
1107         }
1108 
1109         err = hold_for_send(zhp, sdd);
1110         if (err) {
1111                 if (err == ENOENT)
1112                         err = 0;
1113                 zfs_close(zhp);
1114                 return (err);
1115         }
1116 
1117         fromorigin = sdd->prevsnap[0] == '\0' &&
1118             (sdd->fromorigin || sdd->replicate);
1119 

1120         if (sdd->verbose) {
1121                 uint64_t size;
1122                 err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1123                     fromorigin, &size);
1124 
1125                 if (sdd->parsable) {
1126                         if (sdd->prevsnap[0] != '\0') {
1127                                 (void) fprintf(stderr, "incremental\t%s\t%s",
1128                                     sdd->prevsnap, zhp->zfs_name);
1129                         } else {
1130                                 (void) fprintf(stderr, "full\t%s",
1131                                     zhp->zfs_name);
1132                         }
1133                 } else {
1134                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1135                             "send from @%s to %s"),
1136                             sdd->prevsnap, zhp->zfs_name);
1137                 }















1138                 if (err == 0) {
1139                         if (sdd->parsable) {
1140                                 (void) fprintf(stderr, "\t%llu\n",
1141                                     (longlong_t)size);
1142                         } else {
1143                                 char buf[16];
1144                                 zfs_nicenum(size, buf, sizeof (buf));
1145                                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1146                                     " estimated size is %s\n"), buf);


1147                         }
1148                         sdd->size += size;
1149                 } else {

1150                         (void) fprintf(stderr, "\n");
1151                 }
1152         }

1153 
1154         if (!sdd->dryrun) {



1155                 /*
1156                  * If progress reporting is requested, spawn a new thread to
1157                  * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1158                  */
1159                 if (sdd->progress) {
1160                         pa.pa_zhp = zhp;
1161                         pa.pa_fd = sdd->outfd;
1162                         pa.pa_parsable = sdd->parsable;
1163 
1164                         if (err = pthread_create(&tid, NULL,
1165                             send_progress_thread, &pa)) {
1166                                 zfs_close(zhp);
1167                                 return (err);
1168                         }
1169                 }
1170 













1171                 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1172                     fromorigin, sdd->outfd, sdd->debugnv);

1173 
1174                 if (sdd->progress) {


1175                         (void) pthread_cancel(tid);
1176                         (void) pthread_join(tid, NULL);
1177                 }
1178         }
1179 
1180         (void) strcpy(sdd->prevsnap, thissnap);
1181         sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1182         zfs_close(zhp);
1183         return (err);
1184 }
1185 
1186 static int
1187 dump_filesystem(zfs_handle_t *zhp, void *arg)
1188 {
1189         int rv = 0;
1190         send_dump_data_t *sdd = arg;
1191         boolean_t missingfrom = B_FALSE;
1192         zfs_cmd_t zc = { 0 };
1193 
1194         (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",


1397 
1398         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1399                 uint64_t version;
1400                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1401                 if (version >= ZPL_VERSION_SA) {
1402                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1403                 }
1404         }
1405 
1406         if (flags->dedup && !flags->dryrun) {
1407                 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1408                     DMU_BACKUP_FEATURE_DEDUPPROPS);
1409                 if (err = pipe(pipefd)) {
1410                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1411                         return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1412                             errbuf));
1413                 }
1414                 dda.outputfd = outfd;
1415                 dda.inputfd = pipefd[1];
1416                 dda.dedup_hdl = zhp->zfs_hdl;

1417                 if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
1418                         (void) close(pipefd[0]);
1419                         (void) close(pipefd[1]);
1420                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1421                         return (zfs_error(zhp->zfs_hdl,
1422                             EZFS_THREADCREATEFAILED, errbuf));
1423                 }
1424         }
1425 
1426         if (flags->replicate || flags->doall || flags->props) {
1427                 dmu_replay_record_t drr = { 0 };
1428                 char *packbuf = NULL;
1429                 size_t buflen = 0;
1430                 zio_cksum_t zc = { 0 };
1431 
1432                 if (flags->replicate || flags->props) {
1433                         nvlist_t *hdrnv;
1434 
1435                         VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1436                         if (fromsnap) {


1457                         if (err) {
1458                                 fsavl_destroy(fsavl);
1459                                 nvlist_free(fss);
1460                                 goto stderr_out;
1461                         }
1462                 }
1463 
1464                 if (!flags->dryrun) {
1465                         /* write first begin record */
1466                         drr.drr_type = DRR_BEGIN;
1467                         drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1468                         DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1469                             drr_versioninfo, DMU_COMPOUNDSTREAM);
1470                         DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1471                             drr_versioninfo, featureflags);
1472                         (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1473                             sizeof (drr.drr_u.drr_begin.drr_toname),
1474                             "%s@%s", zhp->zfs_name, tosnap);
1475                         drr.drr_payloadlen = buflen;
1476                         err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);

1477 
1478                         /* write header nvlist */
1479                         if (err != -1 && packbuf != NULL) {
1480                                 err = cksum_and_write(packbuf, buflen, &zc,
1481                                     outfd);

1482                         }
1483                         free(packbuf);
1484                         if (err == -1) {
1485                                 fsavl_destroy(fsavl);
1486                                 nvlist_free(fss);
1487                                 err = errno;
1488                                 goto stderr_out;
1489                         }
1490 
1491                         /* write end record */
1492                         bzero(&drr, sizeof (drr));
1493                         drr.drr_type = DRR_END;
1494                         drr.drr_u.drr_end.drr_checksum = zc;
1495                         err = write(outfd, &drr, sizeof (drr));

1496                         if (err == -1) {
1497                                 fsavl_destroy(fsavl);
1498                                 nvlist_free(fss);
1499                                 err = errno;
1500                                 goto stderr_out;
1501                         }
1502 
1503                         err = 0;
1504                 }
1505         }
1506 
1507         /* dump each stream */
1508         sdd.fromsnap = fromsnap;
1509         sdd.tosnap = tosnap;
1510         if (flags->dedup)
1511                 sdd.outfd = pipefd[0];
1512         else
1513                 sdd.outfd = outfd;
1514         sdd.replicate = flags->replicate;
1515         sdd.doall = flags->doall;
1516         sdd.fromorigin = flags->fromorigin;
1517         sdd.fss = fss;
1518         sdd.fsavl = fsavl;
1519         sdd.verbose = flags->verbose;


1520         sdd.parsable = flags->parsable;
1521         sdd.progress = flags->progress;
1522         sdd.dryrun = flags->dryrun;
1523         sdd.filter_cb = filter_func;
1524         sdd.filter_cb_arg = cb_arg;
1525         if (debugnvp)
1526                 sdd.debugnv = *debugnvp;
1527 
1528         /*
1529          * Some flags require that we place user holds on the datasets that are
1530          * being sent so they don't get destroyed during the send. We can skip
1531          * this step if the pool is imported read-only since the datasets cannot
1532          * be destroyed.
1533          */
1534         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1535             ZPOOL_PROP_READONLY, NULL) &&
1536             zfs_spa_version(zhp, &spa_version) == 0 &&
1537             spa_version >= SPA_VERSION_USERREFS &&
1538             (flags->doall || flags->replicate)) {
1539                 ++holdseq;
1540                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1541                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1542                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1543                 if (sdd.cleanup_fd < 0) {
1544                         err = errno;
1545                         goto stderr_out;
1546                 }
1547         } else {
1548                 sdd.cleanup_fd = -1;
1549         }
1550         if (flags->verbose) {
1551                 /*
1552                  * Do a verbose no-op dry run to get all the verbose output
1553                  * before generating any data.  Then do a non-verbose real
1554                  * run to generate the streams.
1555                  */
1556                 sdd.dryrun = B_TRUE;
1557                 err = dump_filesystems(zhp, &sdd);
1558                 sdd.dryrun = flags->dryrun;
1559                 sdd.verbose = B_FALSE;
1560                 if (flags->parsable) {
1561                         (void) fprintf(stderr, "size\t%llu\n",
1562                             (longlong_t)sdd.size);
1563                 } else {
1564                         char buf[16];
1565                         zfs_nicenum(sdd.size, buf, sizeof (buf));
1566                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1567                             "total estimated size is %s\n"), buf);
1568                 }
1569         }
1570         err = dump_filesystems(zhp, &sdd);
1571         fsavl_destroy(fsavl);
1572         nvlist_free(fss);
1573 
1574         if (flags->dedup) {
1575                 (void) close(pipefd[0]);
1576                 (void) pthread_join(tid, NULL);

1577         }
1578 
1579         if (sdd.cleanup_fd != -1) {
1580                 VERIFY(0 == close(sdd.cleanup_fd));
1581                 sdd.cleanup_fd = -1;
1582         }
1583 
1584         if (!flags->dryrun && (flags->replicate || flags->doall ||
1585             flags->props)) {
1586                 /*
1587                  * write final end record.  NB: want to do this even if
1588                  * there was some error, because it might not be totally
1589                  * failed.
1590                  */
1591                 dmu_replay_record_t drr = { 0 };
1592                 drr.drr_type = DRR_END;
1593                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1594                         return (zfs_standard_error(zhp->zfs_hdl,
1595                             errno, errbuf));
1596                 }

1597         }














1598 
1599         return (err || sdd.err);
1600 
1601 stderr_out:
1602         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1603 err_out:
1604         if (sdd.cleanup_fd != -1)
1605                 VERIFY(0 == close(sdd.cleanup_fd));
1606         if (flags->dedup) {
1607                 (void) pthread_cancel(tid);
1608                 (void) pthread_join(tid, NULL);
1609                 (void) close(pipefd[0]);
1610         }
1611         return (err);
1612 }
1613 
1614 /*
1615  * Routines specific to "zfs recv"
1616  */
1617 




   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  26  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  27  */
  28 
  29 #include <assert.h>
  30 #include <ctype.h>
  31 #include <errno.h>
  32 #include <libintl.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <strings.h>
  36 #include <unistd.h>
  37 #include <stddef.h>
  38 #include <fcntl.h>
  39 #include <sys/mount.h>
  40 #include <pthread.h>
  41 #include <umem.h>
  42 #include <time.h>
  43 
  44 #include <libzfs.h>
  45 
  46 #include "zfs_namecheck.h"
  47 #include "zfs_prop.h"
  48 #include "zfs_fletcher.h"
  49 #include "libzfs_impl.h"
  50 #include <sha2.h>
  51 #include <sys/zio_checksum.h>
  52 #include <sys/ddt.h>
  53 
  54 /* in libzfs_dataset.c */
  55 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
  56 
  57 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
  58     int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
  59 
  60 static const zio_cksum_t zero_cksum = { 0 };
  61 
  62 typedef struct dedup_arg {
  63         int     inputfd;
  64         int     outputfd;
  65         uint64_t        dedup_data_sz;
  66         boolean_t       sendsize;
  67         libzfs_handle_t  *dedup_hdl;
  68 } dedup_arg_t;
  69 
  70 typedef struct progress_arg {
  71         zfs_handle_t *pa_zhp;
  72         int pa_fd;
  73         boolean_t pa_parsable;
  74 } progress_arg_t;
  75 
  76 typedef struct dataref {
  77         uint64_t ref_guid;
  78         uint64_t ref_object;
  79         uint64_t ref_offset;
  80 } dataref_t;
  81 
  82 typedef struct dedup_entry {
  83         struct dedup_entry      *dde_next;
  84         zio_cksum_t dde_chksum;
  85         uint64_t dde_prop;
  86         dataref_t dde_ref;


 170         for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
 171             ddepp = &((*ddepp)->dde_next)) {
 172                 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
 173                     (*ddepp)->dde_prop == prop) {
 174                         *dr = (*ddepp)->dde_ref;
 175                         return (B_TRUE);
 176                 }
 177         }
 178         ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
 179         return (B_FALSE);
 180 }
 181 
 182 static int
 183 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
 184 {
 185         fletcher_4_incremental_native(buf, len, zc);
 186         return (write(outfd, buf, len));
 187 }
 188 
 189 /*
 190  * the function used by the cksummer thread that needs to know
 191  * about the sendsize flag
 192  */
 193 static int
 194 dedup_cksum_and_write(dedup_arg_t *dda, const void *buf, uint64_t len,
 195     zio_cksum_t *zc, int outfd)
 196 {
 197         int ret = len;
 198 
 199         dda->dedup_data_sz += len;
 200         fletcher_4_incremental_native(buf, len, zc);
 201         if (!dda->sendsize)
 202                 ret = (write(outfd, buf, len));
 203 
 204         return (ret);
 205 }
 206 
 207 /*
 208  * This function is started in a separate thread when the dedup option
 209  * has been requested.  The main send thread determines the list of
 210  * snapshots to be included in the send stream and makes the ioctl calls
 211  * for each one.  But instead of having the ioctl send the output to the
 212  * the output fd specified by the caller of zfs_send()), the
 213  * ioctl is told to direct the output to a pipe, which is read by the
 214  * alternate thread running THIS function.  This function does the
 215  * dedup'ing by:
 216  *  1. building a dedup table (the DDT)
 217  *  2. doing checksums on each data block and inserting a record in the DDT
 218  *  3. looking for matching checksums, and
 219  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
 220  *      a duplicate block is found.
 221  * The output of this function then goes to the output fd requested
 222  * by the caller of zfs_send().
 223  */
 224 static void *
 225 cksummer(void *arg)
 226 {
 227         dedup_arg_t *dda = arg;


 265         /* Initialize the write-by-reference block. */
 266         wbr_drr.drr_type = DRR_WRITE_BYREF;
 267         wbr_drr.drr_payloadlen = 0;
 268 
 269         outfd = dda->outputfd;
 270         ofp = fdopen(dda->inputfd, "r");
 271         while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
 272 
 273                 switch (drr->drr_type) {
 274                 case DRR_BEGIN:
 275                 {
 276                         int     fflags;
 277                         ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
 278 
 279                         /* set the DEDUP feature flag for this stream */
 280                         fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 281                         fflags |= (DMU_BACKUP_FEATURE_DEDUP |
 282                             DMU_BACKUP_FEATURE_DEDUPPROPS);
 283                         DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
 284 
 285                         if (dedup_cksum_and_write(dda, drr,
 286                             sizeof (dmu_replay_record_t),
 287                             &stream_cksum, outfd) == -1)
 288                                 goto out;
 289                         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 290                             DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
 291                                 int sz = drr->drr_payloadlen;
 292 
 293                                 if (sz > 1<<20) {
 294                                         free(buf);
 295                                         buf = malloc(sz);
 296                                 }
 297                                 (void) ssread(buf, sz, ofp);
 298                                 if (ferror(stdin))
 299                                         perror("fread");
 300                                 if (dedup_cksum_and_write(dda, buf, sz,
 301                                     &stream_cksum, outfd) == -1)
 302                                         goto out;
 303                         }
 304                         break;
 305                 }
 306 
 307                 case DRR_END:
 308                 {
 309                         /* use the recalculated checksum */
 310                         ZIO_SET_CHECKSUM(&drre->drr_checksum,
 311                             stream_cksum.zc_word[0], stream_cksum.zc_word[1],
 312                             stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
 313                         if ((write(outfd, drr,
 314                             sizeof (dmu_replay_record_t))) == -1)
 315                                 goto out;
 316                         dda->dedup_data_sz += sizeof (dmu_replay_record_t);
 317                         break;
 318                 }
 319 
 320                 case DRR_OBJECT:
 321                 {
 322                         if (dedup_cksum_and_write(dda, drr,
 323                             sizeof (dmu_replay_record_t),
 324                             &stream_cksum, outfd) == -1)
 325                                 goto out;
 326                         if (drro->drr_bonuslen > 0) {
 327                                 (void) ssread(buf,
 328                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 329                                     ofp);
 330                                 if (dedup_cksum_and_write(dda, buf,
 331                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 332                                     &stream_cksum, outfd) == -1)
 333                                         goto out;
 334                         }
 335                         break;
 336                 }
 337 
 338                 case DRR_SPILL:
 339                 {
 340                         if (dedup_cksum_and_write(dda, drr,
 341                             sizeof (dmu_replay_record_t),
 342                             &stream_cksum, outfd) == -1)
 343                                 goto out;
 344                         (void) ssread(buf, drrs->drr_length, ofp);
 345                         if (dedup_cksum_and_write(dda, buf, drrs->drr_length,
 346                             &stream_cksum, outfd) == -1)
 347                                 goto out;
 348                         break;
 349                 }
 350 
 351                 case DRR_FREEOBJECTS:
 352                 {
 353                         if (dedup_cksum_and_write(dda, drr,
 354                             sizeof (dmu_replay_record_t),
 355                             &stream_cksum, outfd) == -1)
 356                                 goto out;
 357                         break;
 358                 }
 359 
 360                 case DRR_WRITE:
 361                 {
 362                         dataref_t       dataref;
 363 
 364                         (void) ssread(buf, drrw->drr_length, ofp);
 365 
 366                         /*
 367                          * Use the existing checksum if it's dedup-capable,
 368                          * else calculate a SHA256 checksum for it.
 369                          */
 370 
 371                         if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
 372                             zero_cksum) ||
 373                             !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
 374                                 SHA256_CTX      ctx;


 399                                 /* block already present in stream */
 400                                 wbr_drrr->drr_object = drrw->drr_object;
 401                                 wbr_drrr->drr_offset = drrw->drr_offset;
 402                                 wbr_drrr->drr_length = drrw->drr_length;
 403                                 wbr_drrr->drr_toguid = drrw->drr_toguid;
 404                                 wbr_drrr->drr_refguid = dataref.ref_guid;
 405                                 wbr_drrr->drr_refobject =
 406                                     dataref.ref_object;
 407                                 wbr_drrr->drr_refoffset =
 408                                     dataref.ref_offset;
 409 
 410                                 wbr_drrr->drr_checksumtype =
 411                                     drrw->drr_checksumtype;
 412                                 wbr_drrr->drr_checksumflags =
 413                                     drrw->drr_checksumtype;
 414                                 wbr_drrr->drr_key.ddk_cksum =
 415                                     drrw->drr_key.ddk_cksum;
 416                                 wbr_drrr->drr_key.ddk_prop =
 417                                     drrw->drr_key.ddk_prop;
 418 
 419                                 if (dedup_cksum_and_write(dda, &wbr_drr,
 420                                     sizeof (dmu_replay_record_t), &stream_cksum,
 421                                     outfd) == -1)
 422                                         goto out;
 423                         } else {
 424                                 /* block not previously seen */
 425                                 if (dedup_cksum_and_write(dda, drr,
 426                                     sizeof (dmu_replay_record_t), &stream_cksum,
 427                                     outfd) == -1)
 428                                         goto out;
 429                                 if (dedup_cksum_and_write(dda, buf,
 430                                     drrw->drr_length,
 431                                     &stream_cksum, outfd) == -1)
 432                                         goto out;
 433                         }
 434                         break;
 435                 }
 436 
 437                 case DRR_FREE:
 438                 {
 439                         if (dedup_cksum_and_write(dda, drr,
 440                             sizeof (dmu_replay_record_t),
 441                             &stream_cksum, outfd) == -1)
 442                                 goto out;
 443                         break;
 444                 }
 445 
 446                 default:
 447                         (void) printf("INVALID record type 0x%x\n",
 448                             drr->drr_type);
 449                         /* should never happen, so assert */
 450                         assert(B_FALSE);
 451                 }
 452         }
 453 out:
 454         umem_cache_destroy(ddt.ddecache);
 455         free(ddt.dedup_hash_array);
 456         free(buf);
 457         (void) fclose(ofp);
 458 
 459         return (NULL);
 460 }


 799         if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
 800                 nvlist_free(sd.fss);
 801                 *nvlp = NULL;
 802                 return (EZFS_NOMEM);
 803         }
 804 
 805         *nvlp = sd.fss;
 806         return (0);
 807 }
 808 
 809 /*
 810  * Routines specific to "zfs send"
 811  */
 812 typedef struct send_dump_data {
 813         /* these are all just the short snapname (the part after the @) */
 814         const char *fromsnap;
 815         const char *tosnap;
 816         char prevsnap[ZFS_MAXNAMELEN];
 817         uint64_t prevsnap_obj;
 818         boolean_t seenfrom, seento, replicate, doall, fromorigin;
 819         boolean_t verbose, dryrun, dedup, parsable, progress;
 820         boolean_t sendsize;
 821         uint32_t hdr_send_sz;
 822         uint64_t send_sz;
 823         int outfd;
 824         boolean_t err;
 825         nvlist_t *fss;
 826         avl_tree_t *fsavl;
 827         snapfilter_cb_t *filter_cb;
 828         void *filter_cb_arg;
 829         nvlist_t *debugnv;
 830         char holdtag[ZFS_MAXNAMELEN];
 831         int cleanup_fd;
 832         uint64_t size;
 833 } send_dump_data_t;
 834 
 835 static int
 836 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
 837     boolean_t fromorigin, uint64_t *sizep)
 838 {
 839         zfs_cmd_t zc = { 0 };
 840         libzfs_handle_t *hdl = zhp->zfs_hdl;
 841 
 842         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);


 881                 case EROFS:
 882                         zfs_error_aux(hdl, strerror(errno));
 883                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 884 
 885                 default:
 886                         return (zfs_standard_error(hdl, errno, errbuf));
 887                 }
 888         }
 889 
 890         *sizep = zc.zc_objset_type;
 891 
 892         return (0);
 893 }
 894 
 895 /*
 896  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
 897  * NULL) to the file descriptor specified by outfd.
 898  */
 899 static int
 900 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
 901     boolean_t fromorigin, int outfd, nvlist_t *debugnv,
 902     boolean_t sendsize, uint64_t *sendcounter)
 903 {
 904         zfs_cmd_t zc = { 0 };
 905         libzfs_handle_t *hdl = zhp->zfs_hdl;
 906         nvlist_t *thisdbg;
 907 
 908         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 909         assert(fromsnap_obj == 0 || !fromorigin);
 910 
 911         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 912         zc.zc_cookie = outfd;
 913         zc.zc_obj = fromorigin;
 914         zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 915         zc.zc_fromobj = fromsnap_obj;
 916         zc.zc_sendsize = sendsize;
 917         zc.zc_sendcounter = 0;
 918 
 919         VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
 920         if (fromsnap && fromsnap[0] != '\0') {
 921                 VERIFY(0 == nvlist_add_string(thisdbg,
 922                     "fromsnap", fromsnap));
 923         }
 924 
 925         if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
 926                 char errbuf[1024];
 927                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 928                     "warning: cannot send '%s'"), zhp->zfs_name);
 929 
 930                 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
 931                 if (debugnv) {
 932                         VERIFY(0 == nvlist_add_nvlist(debugnv,
 933                             zhp->zfs_name, thisdbg));
 934                 }
 935                 nvlist_free(thisdbg);
 936 
 937                 switch (errno) {


 951 
 952                 case EDQUOT:
 953                 case EFBIG:
 954                 case EIO:
 955                 case ENOLINK:
 956                 case ENOSPC:
 957                 case ENOSTR:
 958                 case ENXIO:
 959                 case EPIPE:
 960                 case ERANGE:
 961                 case EFAULT:
 962                 case EROFS:
 963                         zfs_error_aux(hdl, strerror(errno));
 964                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 965 
 966                 default:
 967                         return (zfs_standard_error(hdl, errno, errbuf));
 968                 }
 969         }
 970 
 971         *sendcounter = (uint64_t)zc.zc_sendcounter;
 972         if (debugnv)
 973                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 974         nvlist_free(thisdbg);
 975 
 976         return (0);
 977 }
 978 
 979 static int
 980 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
 981 {
 982         zfs_handle_t *pzhp;
 983         int error = 0;
 984         char *thissnap;
 985 
 986         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 987 
 988         if (sdd->dryrun)
 989                 return (0);
 990 
 991         /*


1134                  * exist, and the next accepted snapshot will be sent as
1135                  * an incremental from the last accepted one, or as the
1136                  * first (and full) snapshot in the case of a replication,
1137                  * non-incremental send.
1138                  */
1139                 zfs_close(zhp);
1140                 return (0);
1141         }
1142 
1143         err = hold_for_send(zhp, sdd);
1144         if (err) {
1145                 if (err == ENOENT)
1146                         err = 0;
1147                 zfs_close(zhp);
1148                 return (err);
1149         }
1150 
1151         fromorigin = sdd->prevsnap[0] == '\0' &&
1152             (sdd->fromorigin || sdd->replicate);
1153 
1154         /* print out to-from and approximate size in verbose mode */
1155         if (sdd->verbose) {
1156                 /* print preamble */



1157                 if (sdd->parsable) {
1158                         if (sdd->prevsnap[0] != '\0') {
1159                                 (void) fprintf(stderr, "incremental\t%s\t%s",
1160                                     sdd->prevsnap, zhp->zfs_name);
1161                         } else {
1162                                 (void) fprintf(stderr, "full\t%s",
1163                                     zhp->zfs_name);
1164                         }
1165                 } else {
1166                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1167                             "send from @%s to %s"),
1168                             sdd->prevsnap, zhp->zfs_name);
1169                 }
1170 
1171                 if (sdd->sendsize) {
1172                         /*
1173                          * we are going to print out the exact stream size info,
1174                          * so skip the estimate
1175                          */
1176                         (void) fprintf(stderr, "\n");
1177                 } else {
1178                         /*
1179                          * provide stream size estimate otherwise
1180                          */
1181                         uint64_t size;
1182                         err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1183                             fromorigin, &size);
1184 
1185                         if (err == 0) {
1186                                 if (sdd->parsable) {
1187                                         (void) fprintf(stderr, "\t%llu\n",
1188                                             (longlong_t)size);
1189                                 } else {
1190                                         char buf[16];
1191                                         zfs_nicenum(size, buf, sizeof (buf));
1192                                         (void) fprintf(stderr,
1193                                             dgettext(TEXT_DOMAIN,
1194                                             " estimated size is %s\n"),
1195                                             buf);
1196                                 }
1197                                 sdd->size += size;
1198                         } else {
1199                                 /* could not estimate */
1200                                 (void) fprintf(stderr, "\n");
1201                         }
1202                 }
1203         }
1204 
1205         if (!sdd->dryrun) {
1206                 uint64_t sendcounter = 0;
1207                 boolean_t track_progress = (sdd->progress && !sdd->sendsize);
1208                 boolean_t sendsize = B_FALSE;
1209                 /*
1210                  * If progress reporting is requested, spawn a new thread to
1211                  * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1212                  */
1213                 if (track_progress) {
1214                         pa.pa_zhp = zhp;
1215                         pa.pa_fd = sdd->outfd;
1216                         pa.pa_parsable = sdd->parsable;
1217 
1218                         if (err = pthread_create(&tid, NULL,
1219                             send_progress_thread, &pa)) {
1220                                 zfs_close(zhp);
1221                                 return (err);
1222                         }
1223                 }
1224 
1225 
1226                 /*
1227                  * We need to reset the sendsize flag being sent to
1228                  * kernel if sdd->dedup is set. With dedup, the file
1229                  * descriptor sent to kernel is one end of the pipe,
1230                  * and we would want the data back in the pipe for
1231                  * cksummer() to calculate the exact size of the dedup-ed
1232                  * stream. So reset the sendsize flag such that
1233                  * kernel writes to the pipe.
1234                  */
1235 
1236                 sendsize = sdd->dedup ? B_FALSE : sdd->sendsize;
1237 
1238                 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1239                     fromorigin, sdd->outfd, sdd->debugnv,
1240                     sendsize, &sendcounter);
1241 
1242                 sdd->send_sz += sendcounter;
1243 
1244                 if (track_progress) {
1245                         (void) pthread_cancel(tid);
1246                         (void) pthread_join(tid, NULL);
1247                 }
1248         }
1249 
1250         (void) strcpy(sdd->prevsnap, thissnap);
1251         sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1252         zfs_close(zhp);
1253         return (err);
1254 }
1255 
1256 static int
1257 dump_filesystem(zfs_handle_t *zhp, void *arg)
1258 {
1259         int rv = 0;
1260         send_dump_data_t *sdd = arg;
1261         boolean_t missingfrom = B_FALSE;
1262         zfs_cmd_t zc = { 0 };
1263 
1264         (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",


1467 
1468         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1469                 uint64_t version;
1470                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1471                 if (version >= ZPL_VERSION_SA) {
1472                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1473                 }
1474         }
1475 
1476         if (flags->dedup && !flags->dryrun) {
1477                 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1478                     DMU_BACKUP_FEATURE_DEDUPPROPS);
1479                 if (err = pipe(pipefd)) {
1480                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1481                         return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1482                             errbuf));
1483                 }
1484                 dda.outputfd = outfd;
1485                 dda.inputfd = pipefd[1];
1486                 dda.dedup_hdl = zhp->zfs_hdl;
1487                 dda.sendsize = flags->sendsize;
1488                 if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
1489                         (void) close(pipefd[0]);
1490                         (void) close(pipefd[1]);
1491                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1492                         return (zfs_error(zhp->zfs_hdl,
1493                             EZFS_THREADCREATEFAILED, errbuf));
1494                 }
1495         }
1496 
1497         if (flags->replicate || flags->doall || flags->props) {
1498                 dmu_replay_record_t drr = { 0 };
1499                 char *packbuf = NULL;
1500                 size_t buflen = 0;
1501                 zio_cksum_t zc = { 0 };
1502 
1503                 if (flags->replicate || flags->props) {
1504                         nvlist_t *hdrnv;
1505 
1506                         VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1507                         if (fromsnap) {


1528                         if (err) {
1529                                 fsavl_destroy(fsavl);
1530                                 nvlist_free(fss);
1531                                 goto stderr_out;
1532                         }
1533                 }
1534 
1535                 if (!flags->dryrun) {
1536                         /* write first begin record */
1537                         drr.drr_type = DRR_BEGIN;
1538                         drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1539                         DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1540                             drr_versioninfo, DMU_COMPOUNDSTREAM);
1541                         DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1542                             drr_versioninfo, featureflags);
1543                         (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1544                             sizeof (drr.drr_u.drr_begin.drr_toname),
1545                             "%s@%s", zhp->zfs_name, tosnap);
1546                         drr.drr_payloadlen = buflen;
1547                         err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1548                         sdd.hdr_send_sz += sizeof (drr);
1549 
1550                         /* write header nvlist */
1551                         if (err != -1 && packbuf != NULL) {
1552                                 err = cksum_and_write(packbuf, buflen, &zc,
1553                                     outfd);
1554                                 sdd.hdr_send_sz += buflen;
1555                         }
1556                         free(packbuf);
1557                         if (err == -1) {
1558                                 fsavl_destroy(fsavl);
1559                                 nvlist_free(fss);
1560                                 err = errno;
1561                                 goto stderr_out;
1562                         }
1563 
1564                         /* write end record */
1565                         bzero(&drr, sizeof (drr));
1566                         drr.drr_type = DRR_END;
1567                         drr.drr_u.drr_end.drr_checksum = zc;
1568                         err = write(outfd, &drr, sizeof (drr));
1569                         sdd.hdr_send_sz += sizeof (drr);
1570                         if (err == -1) {
1571                                 fsavl_destroy(fsavl);
1572                                 nvlist_free(fss);
1573                                 err = errno;
1574                                 goto stderr_out;
1575                         }
1576 
1577                         err = 0;
1578                 }
1579         }
1580 
1581         /* dump each stream */
1582         sdd.fromsnap = fromsnap;
1583         sdd.tosnap = tosnap;
1584         if (flags->dedup)
1585                 sdd.outfd = pipefd[0];
1586         else
1587                 sdd.outfd = outfd;
1588         sdd.replicate = flags->replicate;
1589         sdd.doall = flags->doall;
1590         sdd.fromorigin = flags->fromorigin;
1591         sdd.fss = fss;
1592         sdd.fsavl = fsavl;
1593         sdd.verbose = flags->verbose;
1594         sdd.dedup = flags->dedup;
1595         sdd.sendsize = flags->sendsize;
1596         sdd.parsable = flags->parsable;
1597         sdd.progress = flags->progress;
1598         sdd.dryrun = flags->dryrun;
1599         sdd.filter_cb = filter_func;
1600         sdd.filter_cb_arg = cb_arg;
1601         if (debugnvp)
1602                 sdd.debugnv = *debugnvp;
1603 
1604         /*
1605          * Some flags require that we place user holds on the datasets that are
1606          * being sent so they don't get destroyed during the send. We can skip
1607          * this step if the pool is imported read-only since the datasets cannot
1608          * be destroyed.
1609          */
1610         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1611             ZPOOL_PROP_READONLY, NULL) &&
1612             zfs_spa_version(zhp, &spa_version) == 0 &&
1613             spa_version >= SPA_VERSION_USERREFS &&
1614             (flags->doall || flags->replicate)) {
1615                 ++holdseq;
1616                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1617                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1618                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1619                 if (sdd.cleanup_fd < 0) {
1620                         err = errno;
1621                         goto stderr_out;
1622                 }
1623         } else {
1624                 sdd.cleanup_fd = -1;
1625         }
1626         if (flags->verbose && !flags->sendsize) {
1627                 /*
1628                  * Do a verbose no-op dry run to get all the verbose output
1629                  * before generating any data.  Then do a non-verbose real
1630                  * run to generate the streams.
1631                  */
1632                 sdd.dryrun = B_TRUE;
1633                 err = dump_filesystems(zhp, &sdd);
1634                 sdd.dryrun = flags->dryrun;
1635                 sdd.verbose = B_FALSE;
1636                 if (flags->parsable) {
1637                         (void) fprintf(stderr, "size\t%llu\n",
1638                             (longlong_t)sdd.size);
1639                 } else {
1640                         char buf[16];
1641                         zfs_nicenum(sdd.size, buf, sizeof (buf));
1642                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1643                             "total estimated size is %s\n"), buf);
1644                 }
1645         }
1646         err = dump_filesystems(zhp, &sdd);
1647         fsavl_destroy(fsavl);
1648         nvlist_free(fss);
1649 
1650         if (flags->dedup) {
1651                 (void) close(pipefd[0]);
1652                 (void) pthread_join(tid, NULL);
1653                 sdd.send_sz = dda.dedup_data_sz;
1654         }
1655 
1656         if (sdd.cleanup_fd != -1) {
1657                 VERIFY(0 == close(sdd.cleanup_fd));
1658                 sdd.cleanup_fd = -1;
1659         }
1660 
1661         if (!flags->dryrun && (flags->replicate || flags->doall ||
1662             flags->props)) {
1663                 /*
1664                  * write final end record.  NB: want to do this even if
1665                  * there was some error, because it might not be totally
1666                  * failed.
1667                  */
1668                 dmu_replay_record_t drr = { 0 };
1669                 drr.drr_type = DRR_END;
1670                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1671                         return (zfs_standard_error(zhp->zfs_hdl,
1672                             errno, errbuf));
1673                 }
1674                 sdd.hdr_send_sz += sizeof (drr);
1675         }
1676 
1677         if (flags->sendsize) {
1678                 if (flags->verbose) {
1679                         fprintf(stderr, "Send stream header size (bytes): "
1680                             "%u\n", sdd.hdr_send_sz);
1681                         fprintf(stderr, "Send stream data size (bytes):  "
1682                             "%llu\n", sdd.send_sz);
1683                         fprintf(stderr, "Total send stream size (bytes):  "
1684                             "%llu\n", sdd.send_sz + (uint64_t)sdd.hdr_send_sz);
1685                 } else {
1686                         fprintf(stderr, "Total send stream size (bytes):  "
1687                             "%llu\n", sdd.send_sz + (uint64_t)sdd.hdr_send_sz);
1688                 }
1689         }
1690 
1691         return (err || sdd.err);
1692 
1693 stderr_out:
1694         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1695 err_out:
1696         if (sdd.cleanup_fd != -1)
1697                 VERIFY(0 == close(sdd.cleanup_fd));
1698         if (flags->dedup) {
1699                 (void) pthread_cancel(tid);
1700                 (void) pthread_join(tid, NULL);
1701                 (void) close(pipefd[0]);
1702         }
1703         return (err);
1704 }
1705 
1706 /*
1707  * Routines specific to "zfs recv"
1708  */
1709