6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 */
27
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <libintl.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <strings.h>
35 #include <unistd.h>
36 #include <stddef.h>
37 #include <fcntl.h>
38 #include <sys/mount.h>
39 #include <pthread.h>
40 #include <umem.h>
41 #include <time.h>
42
43 #include <libzfs.h>
44
45 #include "zfs_namecheck.h"
46 #include "zfs_prop.h"
47 #include "zfs_fletcher.h"
48 #include "libzfs_impl.h"
49 #include <sha2.h>
50 #include <sys/zio_checksum.h>
51 #include <sys/ddt.h>
52
53 /* in libzfs_dataset.c */
54 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
55
56 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
57 int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
58
59 static const zio_cksum_t zero_cksum = { 0 };
60
61 typedef struct dedup_arg {
62 int inputfd;
63 int outputfd;
64 libzfs_handle_t *dedup_hdl;
65 } dedup_arg_t;
66
67 typedef struct progress_arg {
68 zfs_handle_t *pa_zhp;
69 int pa_fd;
70 boolean_t pa_parsable;
71 } progress_arg_t;
72
73 typedef struct dataref {
74 uint64_t ref_guid;
75 uint64_t ref_object;
76 uint64_t ref_offset;
77 } dataref_t;
78
79 typedef struct dedup_entry {
80 struct dedup_entry *dde_next;
81 zio_cksum_t dde_chksum;
82 uint64_t dde_prop;
83 dataref_t dde_ref;
167 for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
168 ddepp = &((*ddepp)->dde_next)) {
169 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
170 (*ddepp)->dde_prop == prop) {
171 *dr = (*ddepp)->dde_ref;
172 return (B_TRUE);
173 }
174 }
175 ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
176 return (B_FALSE);
177 }
178
179 static int
180 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
181 {
182 fletcher_4_incremental_native(buf, len, zc);
183 return (write(outfd, buf, len));
184 }
185
186 /*
187 * This function is started in a separate thread when the dedup option
188 * has been requested. The main send thread determines the list of
189 * snapshots to be included in the send stream and makes the ioctl calls
190 * for each one. But instead of having the ioctl send the output to the
191 * the output fd specified by the caller of zfs_send()), the
192 * ioctl is told to direct the output to a pipe, which is read by the
193 * alternate thread running THIS function. This function does the
194 * dedup'ing by:
195 * 1. building a dedup table (the DDT)
196 * 2. doing checksums on each data block and inserting a record in the DDT
197 * 3. looking for matching checksums, and
198 * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever
199 * a duplicate block is found.
200 * The output of this function then goes to the output fd requested
201 * by the caller of zfs_send().
202 */
203 static void *
204 cksummer(void *arg)
205 {
206 dedup_arg_t *dda = arg;
244 /* Initialize the write-by-reference block. */
245 wbr_drr.drr_type = DRR_WRITE_BYREF;
246 wbr_drr.drr_payloadlen = 0;
247
248 outfd = dda->outputfd;
249 ofp = fdopen(dda->inputfd, "r");
250 while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
251
252 switch (drr->drr_type) {
253 case DRR_BEGIN:
254 {
255 int fflags;
256 ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
257
258 /* set the DEDUP feature flag for this stream */
259 fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
260 fflags |= (DMU_BACKUP_FEATURE_DEDUP |
261 DMU_BACKUP_FEATURE_DEDUPPROPS);
262 DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
263
264 if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
265 &stream_cksum, outfd) == -1)
266 goto out;
267 if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
268 DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
269 int sz = drr->drr_payloadlen;
270
271 if (sz > 1<<20) {
272 free(buf);
273 buf = malloc(sz);
274 }
275 (void) ssread(buf, sz, ofp);
276 if (ferror(stdin))
277 perror("fread");
278 if (cksum_and_write(buf, sz, &stream_cksum,
279 outfd) == -1)
280 goto out;
281 }
282 break;
283 }
284
285 case DRR_END:
286 {
287 /* use the recalculated checksum */
288 ZIO_SET_CHECKSUM(&drre->drr_checksum,
289 stream_cksum.zc_word[0], stream_cksum.zc_word[1],
290 stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
291 if ((write(outfd, drr,
292 sizeof (dmu_replay_record_t))) == -1)
293 goto out;
294 break;
295 }
296
297 case DRR_OBJECT:
298 {
299 if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
300 &stream_cksum, outfd) == -1)
301 goto out;
302 if (drro->drr_bonuslen > 0) {
303 (void) ssread(buf,
304 P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
305 ofp);
306 if (cksum_and_write(buf,
307 P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
308 &stream_cksum, outfd) == -1)
309 goto out;
310 }
311 break;
312 }
313
314 case DRR_SPILL:
315 {
316 if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
317 &stream_cksum, outfd) == -1)
318 goto out;
319 (void) ssread(buf, drrs->drr_length, ofp);
320 if (cksum_and_write(buf, drrs->drr_length,
321 &stream_cksum, outfd) == -1)
322 goto out;
323 break;
324 }
325
326 case DRR_FREEOBJECTS:
327 {
328 if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
329 &stream_cksum, outfd) == -1)
330 goto out;
331 break;
332 }
333
334 case DRR_WRITE:
335 {
336 dataref_t dataref;
337
338 (void) ssread(buf, drrw->drr_length, ofp);
339
340 /*
341 * Use the existing checksum if it's dedup-capable,
342 * else calculate a SHA256 checksum for it.
343 */
344
345 if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
346 zero_cksum) ||
347 !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
348 SHA256_CTX ctx;
373 /* block already present in stream */
374 wbr_drrr->drr_object = drrw->drr_object;
375 wbr_drrr->drr_offset = drrw->drr_offset;
376 wbr_drrr->drr_length = drrw->drr_length;
377 wbr_drrr->drr_toguid = drrw->drr_toguid;
378 wbr_drrr->drr_refguid = dataref.ref_guid;
379 wbr_drrr->drr_refobject =
380 dataref.ref_object;
381 wbr_drrr->drr_refoffset =
382 dataref.ref_offset;
383
384 wbr_drrr->drr_checksumtype =
385 drrw->drr_checksumtype;
386 wbr_drrr->drr_checksumflags =
387 drrw->drr_checksumtype;
388 wbr_drrr->drr_key.ddk_cksum =
389 drrw->drr_key.ddk_cksum;
390 wbr_drrr->drr_key.ddk_prop =
391 drrw->drr_key.ddk_prop;
392
393 if (cksum_and_write(&wbr_drr,
394 sizeof (dmu_replay_record_t), &stream_cksum,
395 outfd) == -1)
396 goto out;
397 } else {
398 /* block not previously seen */
399 if (cksum_and_write(drr,
400 sizeof (dmu_replay_record_t), &stream_cksum,
401 outfd) == -1)
402 goto out;
403 if (cksum_and_write(buf,
404 drrw->drr_length,
405 &stream_cksum, outfd) == -1)
406 goto out;
407 }
408 break;
409 }
410
411 case DRR_FREE:
412 {
413 if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
414 &stream_cksum, outfd) == -1)
415 goto out;
416 break;
417 }
418
419 default:
420 (void) printf("INVALID record type 0x%x\n",
421 drr->drr_type);
422 /* should never happen, so assert */
423 assert(B_FALSE);
424 }
425 }
426 out:
427 umem_cache_destroy(ddt.ddecache);
428 free(ddt.dedup_hash_array);
429 free(buf);
430 (void) fclose(ofp);
431
432 return (NULL);
433 }
772 if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
773 nvlist_free(sd.fss);
774 *nvlp = NULL;
775 return (EZFS_NOMEM);
776 }
777
778 *nvlp = sd.fss;
779 return (0);
780 }
781
782 /*
783 * Routines specific to "zfs send"
784 */
785 typedef struct send_dump_data {
786 /* these are all just the short snapname (the part after the @) */
787 const char *fromsnap;
788 const char *tosnap;
789 char prevsnap[ZFS_MAXNAMELEN];
790 uint64_t prevsnap_obj;
791 boolean_t seenfrom, seento, replicate, doall, fromorigin;
792 boolean_t verbose, dryrun, parsable, progress;
793 int outfd;
794 boolean_t err;
795 nvlist_t *fss;
796 avl_tree_t *fsavl;
797 snapfilter_cb_t *filter_cb;
798 void *filter_cb_arg;
799 nvlist_t *debugnv;
800 char holdtag[ZFS_MAXNAMELEN];
801 int cleanup_fd;
802 uint64_t size;
803 } send_dump_data_t;
804
805 static int
806 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
807 boolean_t fromorigin, uint64_t *sizep)
808 {
809 zfs_cmd_t zc = { 0 };
810 libzfs_handle_t *hdl = zhp->zfs_hdl;
811
812 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
851 case EROFS:
852 zfs_error_aux(hdl, strerror(errno));
853 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
854
855 default:
856 return (zfs_standard_error(hdl, errno, errbuf));
857 }
858 }
859
860 *sizep = zc.zc_objset_type;
861
862 return (0);
863 }
864
865 /*
866 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
867 * NULL) to the file descriptor specified by outfd.
868 */
869 static int
870 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
871 boolean_t fromorigin, int outfd, nvlist_t *debugnv)
872 {
873 zfs_cmd_t zc = { 0 };
874 libzfs_handle_t *hdl = zhp->zfs_hdl;
875 nvlist_t *thisdbg;
876
877 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
878 assert(fromsnap_obj == 0 || !fromorigin);
879
880 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
881 zc.zc_cookie = outfd;
882 zc.zc_obj = fromorigin;
883 zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
884 zc.zc_fromobj = fromsnap_obj;
885
886 VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
887 if (fromsnap && fromsnap[0] != '\0') {
888 VERIFY(0 == nvlist_add_string(thisdbg,
889 "fromsnap", fromsnap));
890 }
891
892 if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
893 char errbuf[1024];
894 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
895 "warning: cannot send '%s'"), zhp->zfs_name);
896
897 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
898 if (debugnv) {
899 VERIFY(0 == nvlist_add_nvlist(debugnv,
900 zhp->zfs_name, thisdbg));
901 }
902 nvlist_free(thisdbg);
903
904 switch (errno) {
918
919 case EDQUOT:
920 case EFBIG:
921 case EIO:
922 case ENOLINK:
923 case ENOSPC:
924 case ENOSTR:
925 case ENXIO:
926 case EPIPE:
927 case ERANGE:
928 case EFAULT:
929 case EROFS:
930 zfs_error_aux(hdl, strerror(errno));
931 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
932
933 default:
934 return (zfs_standard_error(hdl, errno, errbuf));
935 }
936 }
937
938 if (debugnv)
939 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
940 nvlist_free(thisdbg);
941
942 return (0);
943 }
944
945 static int
946 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
947 {
948 zfs_handle_t *pzhp;
949 int error = 0;
950 char *thissnap;
951
952 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
953
954 if (sdd->dryrun)
955 return (0);
956
957 /*
1100 * exist, and the next accepted snapshot will be sent as
1101 * an incremental from the last accepted one, or as the
1102 * first (and full) snapshot in the case of a replication,
1103 * non-incremental send.
1104 */
1105 zfs_close(zhp);
1106 return (0);
1107 }
1108
1109 err = hold_for_send(zhp, sdd);
1110 if (err) {
1111 if (err == ENOENT)
1112 err = 0;
1113 zfs_close(zhp);
1114 return (err);
1115 }
1116
1117 fromorigin = sdd->prevsnap[0] == '\0' &&
1118 (sdd->fromorigin || sdd->replicate);
1119
1120 if (sdd->verbose) {
1121 uint64_t size;
1122 err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1123 fromorigin, &size);
1124
1125 if (sdd->parsable) {
1126 if (sdd->prevsnap[0] != '\0') {
1127 (void) fprintf(stderr, "incremental\t%s\t%s",
1128 sdd->prevsnap, zhp->zfs_name);
1129 } else {
1130 (void) fprintf(stderr, "full\t%s",
1131 zhp->zfs_name);
1132 }
1133 } else {
1134 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1135 "send from @%s to %s"),
1136 sdd->prevsnap, zhp->zfs_name);
1137 }
1138 if (err == 0) {
1139 if (sdd->parsable) {
1140 (void) fprintf(stderr, "\t%llu\n",
1141 (longlong_t)size);
1142 } else {
1143 char buf[16];
1144 zfs_nicenum(size, buf, sizeof (buf));
1145 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1146 " estimated size is %s\n"), buf);
1147 }
1148 sdd->size += size;
1149 } else {
1150 (void) fprintf(stderr, "\n");
1151 }
1152 }
1153
1154 if (!sdd->dryrun) {
1155 /*
1156 * If progress reporting is requested, spawn a new thread to
1157 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1158 */
1159 if (sdd->progress) {
1160 pa.pa_zhp = zhp;
1161 pa.pa_fd = sdd->outfd;
1162 pa.pa_parsable = sdd->parsable;
1163
1164 if (err = pthread_create(&tid, NULL,
1165 send_progress_thread, &pa)) {
1166 zfs_close(zhp);
1167 return (err);
1168 }
1169 }
1170
1171 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1172 fromorigin, sdd->outfd, sdd->debugnv);
1173
1174 if (sdd->progress) {
1175 (void) pthread_cancel(tid);
1176 (void) pthread_join(tid, NULL);
1177 }
1178 }
1179
1180 (void) strcpy(sdd->prevsnap, thissnap);
1181 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1182 zfs_close(zhp);
1183 return (err);
1184 }
1185
1186 static int
1187 dump_filesystem(zfs_handle_t *zhp, void *arg)
1188 {
1189 int rv = 0;
1190 send_dump_data_t *sdd = arg;
1191 boolean_t missingfrom = B_FALSE;
1192 zfs_cmd_t zc = { 0 };
1193
1194 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1397
1398 if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1399 uint64_t version;
1400 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1401 if (version >= ZPL_VERSION_SA) {
1402 featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1403 }
1404 }
1405
1406 if (flags->dedup && !flags->dryrun) {
1407 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1408 DMU_BACKUP_FEATURE_DEDUPPROPS);
1409 if (err = pipe(pipefd)) {
1410 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1411 return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1412 errbuf));
1413 }
1414 dda.outputfd = outfd;
1415 dda.inputfd = pipefd[1];
1416 dda.dedup_hdl = zhp->zfs_hdl;
1417 if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
1418 (void) close(pipefd[0]);
1419 (void) close(pipefd[1]);
1420 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1421 return (zfs_error(zhp->zfs_hdl,
1422 EZFS_THREADCREATEFAILED, errbuf));
1423 }
1424 }
1425
1426 if (flags->replicate || flags->doall || flags->props) {
1427 dmu_replay_record_t drr = { 0 };
1428 char *packbuf = NULL;
1429 size_t buflen = 0;
1430 zio_cksum_t zc = { 0 };
1431
1432 if (flags->replicate || flags->props) {
1433 nvlist_t *hdrnv;
1434
1435 VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1436 if (fromsnap) {
1457 if (err) {
1458 fsavl_destroy(fsavl);
1459 nvlist_free(fss);
1460 goto stderr_out;
1461 }
1462 }
1463
1464 if (!flags->dryrun) {
1465 /* write first begin record */
1466 drr.drr_type = DRR_BEGIN;
1467 drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1468 DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1469 drr_versioninfo, DMU_COMPOUNDSTREAM);
1470 DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1471 drr_versioninfo, featureflags);
1472 (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1473 sizeof (drr.drr_u.drr_begin.drr_toname),
1474 "%s@%s", zhp->zfs_name, tosnap);
1475 drr.drr_payloadlen = buflen;
1476 err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1477
1478 /* write header nvlist */
1479 if (err != -1 && packbuf != NULL) {
1480 err = cksum_and_write(packbuf, buflen, &zc,
1481 outfd);
1482 }
1483 free(packbuf);
1484 if (err == -1) {
1485 fsavl_destroy(fsavl);
1486 nvlist_free(fss);
1487 err = errno;
1488 goto stderr_out;
1489 }
1490
1491 /* write end record */
1492 bzero(&drr, sizeof (drr));
1493 drr.drr_type = DRR_END;
1494 drr.drr_u.drr_end.drr_checksum = zc;
1495 err = write(outfd, &drr, sizeof (drr));
1496 if (err == -1) {
1497 fsavl_destroy(fsavl);
1498 nvlist_free(fss);
1499 err = errno;
1500 goto stderr_out;
1501 }
1502
1503 err = 0;
1504 }
1505 }
1506
1507 /* dump each stream */
1508 sdd.fromsnap = fromsnap;
1509 sdd.tosnap = tosnap;
1510 if (flags->dedup)
1511 sdd.outfd = pipefd[0];
1512 else
1513 sdd.outfd = outfd;
1514 sdd.replicate = flags->replicate;
1515 sdd.doall = flags->doall;
1516 sdd.fromorigin = flags->fromorigin;
1517 sdd.fss = fss;
1518 sdd.fsavl = fsavl;
1519 sdd.verbose = flags->verbose;
1520 sdd.parsable = flags->parsable;
1521 sdd.progress = flags->progress;
1522 sdd.dryrun = flags->dryrun;
1523 sdd.filter_cb = filter_func;
1524 sdd.filter_cb_arg = cb_arg;
1525 if (debugnvp)
1526 sdd.debugnv = *debugnvp;
1527
1528 /*
1529 * Some flags require that we place user holds on the datasets that are
1530 * being sent so they don't get destroyed during the send. We can skip
1531 * this step if the pool is imported read-only since the datasets cannot
1532 * be destroyed.
1533 */
1534 if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1535 ZPOOL_PROP_READONLY, NULL) &&
1536 zfs_spa_version(zhp, &spa_version) == 0 &&
1537 spa_version >= SPA_VERSION_USERREFS &&
1538 (flags->doall || flags->replicate)) {
1539 ++holdseq;
1540 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1541 ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1542 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1543 if (sdd.cleanup_fd < 0) {
1544 err = errno;
1545 goto stderr_out;
1546 }
1547 } else {
1548 sdd.cleanup_fd = -1;
1549 }
1550 if (flags->verbose) {
1551 /*
1552 * Do a verbose no-op dry run to get all the verbose output
1553 * before generating any data. Then do a non-verbose real
1554 * run to generate the streams.
1555 */
1556 sdd.dryrun = B_TRUE;
1557 err = dump_filesystems(zhp, &sdd);
1558 sdd.dryrun = flags->dryrun;
1559 sdd.verbose = B_FALSE;
1560 if (flags->parsable) {
1561 (void) fprintf(stderr, "size\t%llu\n",
1562 (longlong_t)sdd.size);
1563 } else {
1564 char buf[16];
1565 zfs_nicenum(sdd.size, buf, sizeof (buf));
1566 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1567 "total estimated size is %s\n"), buf);
1568 }
1569 }
1570 err = dump_filesystems(zhp, &sdd);
1571 fsavl_destroy(fsavl);
1572 nvlist_free(fss);
1573
1574 if (flags->dedup) {
1575 (void) close(pipefd[0]);
1576 (void) pthread_join(tid, NULL);
1577 }
1578
1579 if (sdd.cleanup_fd != -1) {
1580 VERIFY(0 == close(sdd.cleanup_fd));
1581 sdd.cleanup_fd = -1;
1582 }
1583
1584 if (!flags->dryrun && (flags->replicate || flags->doall ||
1585 flags->props)) {
1586 /*
1587 * write final end record. NB: want to do this even if
1588 * there was some error, because it might not be totally
1589 * failed.
1590 */
1591 dmu_replay_record_t drr = { 0 };
1592 drr.drr_type = DRR_END;
1593 if (write(outfd, &drr, sizeof (drr)) == -1) {
1594 return (zfs_standard_error(zhp->zfs_hdl,
1595 errno, errbuf));
1596 }
1597 }
1598
1599 return (err || sdd.err);
1600
1601 stderr_out:
1602 err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1603 err_out:
1604 if (sdd.cleanup_fd != -1)
1605 VERIFY(0 == close(sdd.cleanup_fd));
1606 if (flags->dedup) {
1607 (void) pthread_cancel(tid);
1608 (void) pthread_join(tid, NULL);
1609 (void) close(pipefd[0]);
1610 }
1611 return (err);
1612 }
1613
1614 /*
1615 * Routines specific to "zfs recv"
1616 */
1617
|
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 #include <assert.h>
30 #include <ctype.h>
31 #include <errno.h>
32 #include <libintl.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <strings.h>
36 #include <unistd.h>
37 #include <stddef.h>
38 #include <fcntl.h>
39 #include <sys/mount.h>
40 #include <pthread.h>
41 #include <umem.h>
42 #include <time.h>
43
44 #include <libzfs.h>
45
46 #include "zfs_namecheck.h"
47 #include "zfs_prop.h"
48 #include "zfs_fletcher.h"
49 #include "libzfs_impl.h"
50 #include <sha2.h>
51 #include <sys/zio_checksum.h>
52 #include <sys/ddt.h>
53
54 /* in libzfs_dataset.c */
55 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
56
57 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
58 int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
59
60 static const zio_cksum_t zero_cksum = { 0 };
61
62 typedef struct dedup_arg {
63 int inputfd;
64 int outputfd;
65 uint64_t dedup_data_sz;
66 boolean_t sendsize;
67 libzfs_handle_t *dedup_hdl;
68 } dedup_arg_t;
69
70 typedef struct progress_arg {
71 zfs_handle_t *pa_zhp;
72 int pa_fd;
73 boolean_t pa_parsable;
74 } progress_arg_t;
75
76 typedef struct dataref {
77 uint64_t ref_guid;
78 uint64_t ref_object;
79 uint64_t ref_offset;
80 } dataref_t;
81
82 typedef struct dedup_entry {
83 struct dedup_entry *dde_next;
84 zio_cksum_t dde_chksum;
85 uint64_t dde_prop;
86 dataref_t dde_ref;
170 for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
171 ddepp = &((*ddepp)->dde_next)) {
172 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
173 (*ddepp)->dde_prop == prop) {
174 *dr = (*ddepp)->dde_ref;
175 return (B_TRUE);
176 }
177 }
178 ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
179 return (B_FALSE);
180 }
181
182 static int
183 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
184 {
185 fletcher_4_incremental_native(buf, len, zc);
186 return (write(outfd, buf, len));
187 }
188
189 /*
190 * the function used by the cksummer thread that needs to know
191 * about the sendsize flag
192 */
193 static int
194 dedup_cksum_and_write(dedup_arg_t *dda, const void *buf, uint64_t len,
195 zio_cksum_t *zc, int outfd)
196 {
197 int ret = len;
198
199 dda->dedup_data_sz += len;
200 fletcher_4_incremental_native(buf, len, zc);
201 if (!dda->sendsize)
202 ret = (write(outfd, buf, len));
203
204 return (ret);
205 }
206
207 /*
208 * This function is started in a separate thread when the dedup option
209 * has been requested. The main send thread determines the list of
210 * snapshots to be included in the send stream and makes the ioctl calls
211 * for each one. But instead of having the ioctl send the output to the
212 * the output fd specified by the caller of zfs_send()), the
213 * ioctl is told to direct the output to a pipe, which is read by the
214 * alternate thread running THIS function. This function does the
215 * dedup'ing by:
216 * 1. building a dedup table (the DDT)
217 * 2. doing checksums on each data block and inserting a record in the DDT
218 * 3. looking for matching checksums, and
219 * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever
220 * a duplicate block is found.
221 * The output of this function then goes to the output fd requested
222 * by the caller of zfs_send().
223 */
224 static void *
225 cksummer(void *arg)
226 {
227 dedup_arg_t *dda = arg;
265 /* Initialize the write-by-reference block. */
266 wbr_drr.drr_type = DRR_WRITE_BYREF;
267 wbr_drr.drr_payloadlen = 0;
268
269 outfd = dda->outputfd;
270 ofp = fdopen(dda->inputfd, "r");
271 while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
272
273 switch (drr->drr_type) {
274 case DRR_BEGIN:
275 {
276 int fflags;
277 ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
278
279 /* set the DEDUP feature flag for this stream */
280 fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
281 fflags |= (DMU_BACKUP_FEATURE_DEDUP |
282 DMU_BACKUP_FEATURE_DEDUPPROPS);
283 DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
284
285 if (dedup_cksum_and_write(dda, drr,
286 sizeof (dmu_replay_record_t),
287 &stream_cksum, outfd) == -1)
288 goto out;
289 if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
290 DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
291 int sz = drr->drr_payloadlen;
292
293 if (sz > 1<<20) {
294 free(buf);
295 buf = malloc(sz);
296 }
297 (void) ssread(buf, sz, ofp);
298 if (ferror(stdin))
299 perror("fread");
300 if (dedup_cksum_and_write(dda, buf, sz,
301 &stream_cksum, outfd) == -1)
302 goto out;
303 }
304 break;
305 }
306
307 case DRR_END:
308 {
309 /* use the recalculated checksum */
310 ZIO_SET_CHECKSUM(&drre->drr_checksum,
311 stream_cksum.zc_word[0], stream_cksum.zc_word[1],
312 stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
313 if ((write(outfd, drr,
314 sizeof (dmu_replay_record_t))) == -1)
315 goto out;
316 dda->dedup_data_sz += sizeof (dmu_replay_record_t);
317 break;
318 }
319
320 case DRR_OBJECT:
321 {
322 if (dedup_cksum_and_write(dda, drr,
323 sizeof (dmu_replay_record_t),
324 &stream_cksum, outfd) == -1)
325 goto out;
326 if (drro->drr_bonuslen > 0) {
327 (void) ssread(buf,
328 P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
329 ofp);
330 if (dedup_cksum_and_write(dda, buf,
331 P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
332 &stream_cksum, outfd) == -1)
333 goto out;
334 }
335 break;
336 }
337
338 case DRR_SPILL:
339 {
340 if (dedup_cksum_and_write(dda, drr,
341 sizeof (dmu_replay_record_t),
342 &stream_cksum, outfd) == -1)
343 goto out;
344 (void) ssread(buf, drrs->drr_length, ofp);
345 if (dedup_cksum_and_write(dda, buf, drrs->drr_length,
346 &stream_cksum, outfd) == -1)
347 goto out;
348 break;
349 }
350
351 case DRR_FREEOBJECTS:
352 {
353 if (dedup_cksum_and_write(dda, drr,
354 sizeof (dmu_replay_record_t),
355 &stream_cksum, outfd) == -1)
356 goto out;
357 break;
358 }
359
360 case DRR_WRITE:
361 {
362 dataref_t dataref;
363
364 (void) ssread(buf, drrw->drr_length, ofp);
365
366 /*
367 * Use the existing checksum if it's dedup-capable,
368 * else calculate a SHA256 checksum for it.
369 */
370
371 if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
372 zero_cksum) ||
373 !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
374 SHA256_CTX ctx;
399 /* block already present in stream */
400 wbr_drrr->drr_object = drrw->drr_object;
401 wbr_drrr->drr_offset = drrw->drr_offset;
402 wbr_drrr->drr_length = drrw->drr_length;
403 wbr_drrr->drr_toguid = drrw->drr_toguid;
404 wbr_drrr->drr_refguid = dataref.ref_guid;
405 wbr_drrr->drr_refobject =
406 dataref.ref_object;
407 wbr_drrr->drr_refoffset =
408 dataref.ref_offset;
409
410 wbr_drrr->drr_checksumtype =
411 drrw->drr_checksumtype;
412 wbr_drrr->drr_checksumflags =
413 drrw->drr_checksumtype;
414 wbr_drrr->drr_key.ddk_cksum =
415 drrw->drr_key.ddk_cksum;
416 wbr_drrr->drr_key.ddk_prop =
417 drrw->drr_key.ddk_prop;
418
419 if (dedup_cksum_and_write(dda, &wbr_drr,
420 sizeof (dmu_replay_record_t), &stream_cksum,
421 outfd) == -1)
422 goto out;
423 } else {
424 /* block not previously seen */
425 if (dedup_cksum_and_write(dda, drr,
426 sizeof (dmu_replay_record_t), &stream_cksum,
427 outfd) == -1)
428 goto out;
429 if (dedup_cksum_and_write(dda, buf,
430 drrw->drr_length,
431 &stream_cksum, outfd) == -1)
432 goto out;
433 }
434 break;
435 }
436
437 case DRR_FREE:
438 {
439 if (dedup_cksum_and_write(dda, drr,
440 sizeof (dmu_replay_record_t),
441 &stream_cksum, outfd) == -1)
442 goto out;
443 break;
444 }
445
446 default:
447 (void) printf("INVALID record type 0x%x\n",
448 drr->drr_type);
449 /* should never happen, so assert */
450 assert(B_FALSE);
451 }
452 }
453 out:
454 umem_cache_destroy(ddt.ddecache);
455 free(ddt.dedup_hash_array);
456 free(buf);
457 (void) fclose(ofp);
458
459 return (NULL);
460 }
799 if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
800 nvlist_free(sd.fss);
801 *nvlp = NULL;
802 return (EZFS_NOMEM);
803 }
804
805 *nvlp = sd.fss;
806 return (0);
807 }
808
809 /*
810 * Routines specific to "zfs send"
811 */
812 typedef struct send_dump_data {
813 /* these are all just the short snapname (the part after the @) */
814 const char *fromsnap;
815 const char *tosnap;
816 char prevsnap[ZFS_MAXNAMELEN];
817 uint64_t prevsnap_obj;
818 boolean_t seenfrom, seento, replicate, doall, fromorigin;
819 boolean_t verbose, dryrun, dedup, parsable, progress;
820 boolean_t sendsize;
821 uint32_t hdr_send_sz;
822 uint64_t send_sz;
823 int outfd;
824 boolean_t err;
825 nvlist_t *fss;
826 avl_tree_t *fsavl;
827 snapfilter_cb_t *filter_cb;
828 void *filter_cb_arg;
829 nvlist_t *debugnv;
830 char holdtag[ZFS_MAXNAMELEN];
831 int cleanup_fd;
832 uint64_t size;
833 } send_dump_data_t;
834
835 static int
836 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
837 boolean_t fromorigin, uint64_t *sizep)
838 {
839 zfs_cmd_t zc = { 0 };
840 libzfs_handle_t *hdl = zhp->zfs_hdl;
841
842 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
881 case EROFS:
882 zfs_error_aux(hdl, strerror(errno));
883 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
884
885 default:
886 return (zfs_standard_error(hdl, errno, errbuf));
887 }
888 }
889
890 *sizep = zc.zc_objset_type;
891
892 return (0);
893 }
894
895 /*
896 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
897 * NULL) to the file descriptor specified by outfd.
898 */
899 static int
900 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
901 boolean_t fromorigin, int outfd, nvlist_t *debugnv,
902 boolean_t sendsize, uint64_t *sendcounter)
903 {
904 zfs_cmd_t zc = { 0 };
905 libzfs_handle_t *hdl = zhp->zfs_hdl;
906 nvlist_t *thisdbg;
907
908 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
909 assert(fromsnap_obj == 0 || !fromorigin);
910
911 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
912 zc.zc_cookie = outfd;
913 zc.zc_obj = fromorigin;
914 zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
915 zc.zc_fromobj = fromsnap_obj;
916 zc.zc_sendsize = sendsize;
917 zc.zc_sendcounter = 0;
918
919 VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
920 if (fromsnap && fromsnap[0] != '\0') {
921 VERIFY(0 == nvlist_add_string(thisdbg,
922 "fromsnap", fromsnap));
923 }
924
925 if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
926 char errbuf[1024];
927 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
928 "warning: cannot send '%s'"), zhp->zfs_name);
929
930 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
931 if (debugnv) {
932 VERIFY(0 == nvlist_add_nvlist(debugnv,
933 zhp->zfs_name, thisdbg));
934 }
935 nvlist_free(thisdbg);
936
937 switch (errno) {
951
952 case EDQUOT:
953 case EFBIG:
954 case EIO:
955 case ENOLINK:
956 case ENOSPC:
957 case ENOSTR:
958 case ENXIO:
959 case EPIPE:
960 case ERANGE:
961 case EFAULT:
962 case EROFS:
963 zfs_error_aux(hdl, strerror(errno));
964 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
965
966 default:
967 return (zfs_standard_error(hdl, errno, errbuf));
968 }
969 }
970
971 *sendcounter = (uint64_t)zc.zc_sendcounter;
972 if (debugnv)
973 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
974 nvlist_free(thisdbg);
975
976 return (0);
977 }
978
979 static int
980 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
981 {
982 zfs_handle_t *pzhp;
983 int error = 0;
984 char *thissnap;
985
986 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
987
988 if (sdd->dryrun)
989 return (0);
990
991 /*
1134 * exist, and the next accepted snapshot will be sent as
1135 * an incremental from the last accepted one, or as the
1136 * first (and full) snapshot in the case of a replication,
1137 * non-incremental send.
1138 */
1139 zfs_close(zhp);
1140 return (0);
1141 }
1142
1143 err = hold_for_send(zhp, sdd);
1144 if (err) {
1145 if (err == ENOENT)
1146 err = 0;
1147 zfs_close(zhp);
1148 return (err);
1149 }
1150
1151 fromorigin = sdd->prevsnap[0] == '\0' &&
1152 (sdd->fromorigin || sdd->replicate);
1153
1154 /* print out to-from and approximate size in verbose mode */
1155 if (sdd->verbose) {
1156 /* print preamble */
1157 if (sdd->parsable) {
1158 if (sdd->prevsnap[0] != '\0') {
1159 (void) fprintf(stderr, "incremental\t%s\t%s",
1160 sdd->prevsnap, zhp->zfs_name);
1161 } else {
1162 (void) fprintf(stderr, "full\t%s",
1163 zhp->zfs_name);
1164 }
1165 } else {
1166 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1167 "send from @%s to %s"),
1168 sdd->prevsnap, zhp->zfs_name);
1169 }
1170
1171 if (sdd->sendsize) {
1172 /*
1173 * we are going to print out the exact stream size info,
1174 * so skip the estimate
1175 */
1176 (void) fprintf(stderr, "\n");
1177 } else {
1178 /*
1179 * provide stream size estimate otherwise
1180 */
1181 uint64_t size;
1182 err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1183 fromorigin, &size);
1184
1185 if (err == 0) {
1186 if (sdd->parsable) {
1187 (void) fprintf(stderr, "\t%llu\n",
1188 (longlong_t)size);
1189 } else {
1190 char buf[16];
1191 zfs_nicenum(size, buf, sizeof (buf));
1192 (void) fprintf(stderr,
1193 dgettext(TEXT_DOMAIN,
1194 " estimated size is %s\n"),
1195 buf);
1196 }
1197 sdd->size += size;
1198 } else {
1199 /* could not estimate */
1200 (void) fprintf(stderr, "\n");
1201 }
1202 }
1203 }
1204
1205 if (!sdd->dryrun) {
1206 uint64_t sendcounter = 0;
1207 boolean_t track_progress = (sdd->progress && !sdd->sendsize);
1208 boolean_t sendsize = B_FALSE;
1209 /*
1210 * If progress reporting is requested, spawn a new thread to
1211 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1212 */
1213 if (track_progress) {
1214 pa.pa_zhp = zhp;
1215 pa.pa_fd = sdd->outfd;
1216 pa.pa_parsable = sdd->parsable;
1217
1218 if (err = pthread_create(&tid, NULL,
1219 send_progress_thread, &pa)) {
1220 zfs_close(zhp);
1221 return (err);
1222 }
1223 }
1224
1225
1226 /*
1227 * We need to reset the sendsize flag being sent to
1228 * kernel if sdd->dedup is set. With dedup, the file
1229 * descriptor sent to kernel is one end of the pipe,
1230 * and we would want the data back in the pipe for
1231 * cksummer() to calculate the exact size of the dedup-ed
1232 * stream. So reset the sendsize flag such that
1233 * kernel writes to the pipe.
1234 */
1235
1236 sendsize = sdd->dedup ? B_FALSE : sdd->sendsize;
1237
1238 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1239 fromorigin, sdd->outfd, sdd->debugnv,
1240 sendsize, &sendcounter);
1241
1242 sdd->send_sz += sendcounter;
1243
1244 if (track_progress) {
1245 (void) pthread_cancel(tid);
1246 (void) pthread_join(tid, NULL);
1247 }
1248 }
1249
1250 (void) strcpy(sdd->prevsnap, thissnap);
1251 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1252 zfs_close(zhp);
1253 return (err);
1254 }
1255
1256 static int
1257 dump_filesystem(zfs_handle_t *zhp, void *arg)
1258 {
1259 int rv = 0;
1260 send_dump_data_t *sdd = arg;
1261 boolean_t missingfrom = B_FALSE;
1262 zfs_cmd_t zc = { 0 };
1263
1264 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1467
1468 if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1469 uint64_t version;
1470 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1471 if (version >= ZPL_VERSION_SA) {
1472 featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1473 }
1474 }
1475
1476 if (flags->dedup && !flags->dryrun) {
1477 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1478 DMU_BACKUP_FEATURE_DEDUPPROPS);
1479 if (err = pipe(pipefd)) {
1480 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1481 return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1482 errbuf));
1483 }
1484 dda.outputfd = outfd;
1485 dda.inputfd = pipefd[1];
1486 dda.dedup_hdl = zhp->zfs_hdl;
1487 dda.sendsize = flags->sendsize;
1488 if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
1489 (void) close(pipefd[0]);
1490 (void) close(pipefd[1]);
1491 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1492 return (zfs_error(zhp->zfs_hdl,
1493 EZFS_THREADCREATEFAILED, errbuf));
1494 }
1495 }
1496
1497 if (flags->replicate || flags->doall || flags->props) {
1498 dmu_replay_record_t drr = { 0 };
1499 char *packbuf = NULL;
1500 size_t buflen = 0;
1501 zio_cksum_t zc = { 0 };
1502
1503 if (flags->replicate || flags->props) {
1504 nvlist_t *hdrnv;
1505
1506 VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1507 if (fromsnap) {
1528 if (err) {
1529 fsavl_destroy(fsavl);
1530 nvlist_free(fss);
1531 goto stderr_out;
1532 }
1533 }
1534
1535 if (!flags->dryrun) {
1536 /* write first begin record */
1537 drr.drr_type = DRR_BEGIN;
1538 drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1539 DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1540 drr_versioninfo, DMU_COMPOUNDSTREAM);
1541 DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1542 drr_versioninfo, featureflags);
1543 (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1544 sizeof (drr.drr_u.drr_begin.drr_toname),
1545 "%s@%s", zhp->zfs_name, tosnap);
1546 drr.drr_payloadlen = buflen;
1547 err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1548 sdd.hdr_send_sz += sizeof (drr);
1549
1550 /* write header nvlist */
1551 if (err != -1 && packbuf != NULL) {
1552 err = cksum_and_write(packbuf, buflen, &zc,
1553 outfd);
1554 sdd.hdr_send_sz += buflen;
1555 }
1556 free(packbuf);
1557 if (err == -1) {
1558 fsavl_destroy(fsavl);
1559 nvlist_free(fss);
1560 err = errno;
1561 goto stderr_out;
1562 }
1563
1564 /* write end record */
1565 bzero(&drr, sizeof (drr));
1566 drr.drr_type = DRR_END;
1567 drr.drr_u.drr_end.drr_checksum = zc;
1568 err = write(outfd, &drr, sizeof (drr));
1569 sdd.hdr_send_sz += sizeof (drr);
1570 if (err == -1) {
1571 fsavl_destroy(fsavl);
1572 nvlist_free(fss);
1573 err = errno;
1574 goto stderr_out;
1575 }
1576
1577 err = 0;
1578 }
1579 }
1580
1581 /* dump each stream */
1582 sdd.fromsnap = fromsnap;
1583 sdd.tosnap = tosnap;
1584 if (flags->dedup)
1585 sdd.outfd = pipefd[0];
1586 else
1587 sdd.outfd = outfd;
1588 sdd.replicate = flags->replicate;
1589 sdd.doall = flags->doall;
1590 sdd.fromorigin = flags->fromorigin;
1591 sdd.fss = fss;
1592 sdd.fsavl = fsavl;
1593 sdd.verbose = flags->verbose;
1594 sdd.dedup = flags->dedup;
1595 sdd.sendsize = flags->sendsize;
1596 sdd.parsable = flags->parsable;
1597 sdd.progress = flags->progress;
1598 sdd.dryrun = flags->dryrun;
1599 sdd.filter_cb = filter_func;
1600 sdd.filter_cb_arg = cb_arg;
1601 if (debugnvp)
1602 sdd.debugnv = *debugnvp;
1603
1604 /*
1605 * Some flags require that we place user holds on the datasets that are
1606 * being sent so they don't get destroyed during the send. We can skip
1607 * this step if the pool is imported read-only since the datasets cannot
1608 * be destroyed.
1609 */
1610 if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1611 ZPOOL_PROP_READONLY, NULL) &&
1612 zfs_spa_version(zhp, &spa_version) == 0 &&
1613 spa_version >= SPA_VERSION_USERREFS &&
1614 (flags->doall || flags->replicate)) {
1615 ++holdseq;
1616 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1617 ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1618 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1619 if (sdd.cleanup_fd < 0) {
1620 err = errno;
1621 goto stderr_out;
1622 }
1623 } else {
1624 sdd.cleanup_fd = -1;
1625 }
1626 if (flags->verbose && !flags->sendsize) {
1627 /*
1628 * Do a verbose no-op dry run to get all the verbose output
1629 * before generating any data. Then do a non-verbose real
1630 * run to generate the streams.
1631 */
1632 sdd.dryrun = B_TRUE;
1633 err = dump_filesystems(zhp, &sdd);
1634 sdd.dryrun = flags->dryrun;
1635 sdd.verbose = B_FALSE;
1636 if (flags->parsable) {
1637 (void) fprintf(stderr, "size\t%llu\n",
1638 (longlong_t)sdd.size);
1639 } else {
1640 char buf[16];
1641 zfs_nicenum(sdd.size, buf, sizeof (buf));
1642 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1643 "total estimated size is %s\n"), buf);
1644 }
1645 }
1646 err = dump_filesystems(zhp, &sdd);
1647 fsavl_destroy(fsavl);
1648 nvlist_free(fss);
1649
1650 if (flags->dedup) {
1651 (void) close(pipefd[0]);
1652 (void) pthread_join(tid, NULL);
1653 sdd.send_sz = dda.dedup_data_sz;
1654 }
1655
1656 if (sdd.cleanup_fd != -1) {
1657 VERIFY(0 == close(sdd.cleanup_fd));
1658 sdd.cleanup_fd = -1;
1659 }
1660
1661 if (!flags->dryrun && (flags->replicate || flags->doall ||
1662 flags->props)) {
1663 /*
1664 * write final end record. NB: want to do this even if
1665 * there was some error, because it might not be totally
1666 * failed.
1667 */
1668 dmu_replay_record_t drr = { 0 };
1669 drr.drr_type = DRR_END;
1670 if (write(outfd, &drr, sizeof (drr)) == -1) {
1671 return (zfs_standard_error(zhp->zfs_hdl,
1672 errno, errbuf));
1673 }
1674 sdd.hdr_send_sz += sizeof (drr);
1675 }
1676
1677 if (flags->sendsize) {
1678 if (flags->verbose) {
1679 fprintf(stderr, "Send stream header size (bytes): "
1680 "%u\n", sdd.hdr_send_sz);
1681 fprintf(stderr, "Send stream data size (bytes): "
1682 "%llu\n", sdd.send_sz);
1683 fprintf(stderr, "Total send stream size (bytes): "
1684 "%llu\n", sdd.send_sz + (uint64_t)sdd.hdr_send_sz);
1685 } else {
1686 fprintf(stderr, "Total send stream size (bytes): "
1687 "%llu\n", sdd.send_sz + (uint64_t)sdd.hdr_send_sz);
1688 }
1689 }
1690
1691 return (err || sdd.err);
1692
1693 stderr_out:
1694 err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1695 err_out:
1696 if (sdd.cleanup_fd != -1)
1697 VERIFY(0 == close(sdd.cleanup_fd));
1698 if (flags->dedup) {
1699 (void) pthread_cancel(tid);
1700 (void) pthread_join(tid, NULL);
1701 (void) close(pipefd[0]);
1702 }
1703 return (err);
1704 }
1705
1706 /*
1707 * Routines specific to "zfs recv"
1708 */
1709
|